]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mon/PGMonitor.cc:warning if pg not scrubbed 6440/head
authorMichal Jarzabek <stiopa@gmail.com>
Sat, 23 Jan 2016 12:08:57 +0000 (12:08 +0000)
committerMichal Jarzabek <stiopa@gmail.com>
Sat, 23 Jan 2016 16:22:26 +0000 (16:22 +0000)
Added 2 flags for displaying a warning when pg is unscrubbed:
mon report not scrubbed
mon report not deep scrubbed

Each of them specifies the amount of time past the mon_scrub_interval
after which an unscrubbed pg is displayed:
ceph -s                 -for number of unscrubbed pgs
ceph health             -for number of unscrubbed pgs
ceph health detail      -for detailed list of unscrubbed pgs

Both flags are set to 0 by default, which means they are turned off.

Fixes: #13142
Signed-off-by: Michal Jarzabek <stiopa@gmail.com>
src/common/config_opts.h
src/mon/MDSMonitor.cc
src/mon/MDSMonitor.h
src/mon/Monitor.cc
src/mon/MonmapMonitor.cc
src/mon/MonmapMonitor.h
src/mon/OSDMonitor.cc
src/mon/OSDMonitor.h
src/mon/PGMonitor.cc
src/mon/PGMonitor.h
src/mon/PaxosService.h

index dcbb7e027c26aad8cbf2afe850253874618be21c..178cec639e426d9299df0456fecced3115e896c3 100644 (file)
@@ -273,6 +273,8 @@ OPTION(mon_health_to_clog_tick_interval, OPT_DOUBLE, 60.0)
 OPTION(mon_data_avail_crit, OPT_INT, 5)
 OPTION(mon_data_avail_warn, OPT_INT, 30)
 OPTION(mon_data_size_warn, OPT_U64, 15*1024*1024*1024) // issue a warning when the monitor's data store goes over 15GB (in bytes)
+OPTION(mon_warn_not_scrubbed, OPT_INT, 0)
+OPTION(mon_warn_not_deep_scrubbed, OPT_INT, 0)
 OPTION(mon_scrub_interval, OPT_INT, 3600*24) // once a day
 OPTION(mon_scrub_timeout, OPT_INT, 60*5) // let's give it 5 minutes; why not.
 OPTION(mon_scrub_max_keys, OPT_INT, 100) // max number of keys to scrub each time
index 664af7b03e752aa87d2a7686db944274e1da75ce..b9f20cd37b09ca356f25be94f2579f51adabcad2 100644 (file)
@@ -682,7 +682,8 @@ void MDSMonitor::on_active()
 }
 
 void MDSMonitor::get_health(list<pair<health_status_t, string> >& summary,
-                           list<pair<health_status_t, string> > *detail) const
+                           list<pair<health_status_t, string> > *detail,
+                           CephContext* cct) const
 {
   mdsmap.get_health(summary, detail);
 
index 03a22764b7099799dddfc241f45a39b382184676..b755ba9e82c21027c52fc5a439782bca6238354b 100644 (file)
@@ -97,7 +97,8 @@ class MDSMonitor : public PaxosService {
   bool prepare_offload_targets(MonOpRequestRef op);
 
   void get_health(list<pair<health_status_t,string> >& summary,
-                 list<pair<health_status_t,string> > *detail) const;
+                 list<pair<health_status_t,string> > *detail,
+                 CephContext *cct) const override;
   int fail_mds(std::ostream &ss, const std::string &arg);
   void fail_mds_gid(mds_gid_t gid);
 
index e54e55e76e44de9a88d2db40bd8cf933ed23ee57..23f18e168b5c49336319ba2e01ce3f35b578b977 100644 (file)
@@ -2290,7 +2290,7 @@ health_status_t Monitor::get_health(list<string>& status,
        p != paxos_service.end();
        ++p) {
     PaxosService *s = *p;
-    s->get_health(summary, detailbl ? &detail : NULL);
+    s->get_health(summary, detailbl ? &detail : NULL, cct);
   }
 
   health_monitor->get_health(f, summary, (detailbl ? &detail : NULL));
index 900c2912771c055fc8737b90b3dfd12d7a36877d..6323b85174db2efb0c6a794f0e72ebfac6940d12 100644 (file)
@@ -528,7 +528,8 @@ void MonmapMonitor::tick()
 }
 
 void MonmapMonitor::get_health(list<pair<health_status_t, string> >& summary,
-                              list<pair<health_status_t, string> > *detail) const
+                              list<pair<health_status_t, string> > *detail,
+                              CephContext *cct) const
 {
   int max = mon->monmap->size();
   int actual = mon->get_quorum().size();
index f55409217b5b7b39cbd5d43466f861b3fe930012..0b5f37cc372b11b1bd4630d2c44f4978b10c8d6e 100644 (file)
@@ -68,7 +68,8 @@ class MonmapMonitor : public PaxosService {
   bool prepare_command(MonOpRequestRef op);
 
   void get_health(list<pair<health_status_t,string> >& summary,
-                 list<pair<health_status_t,string> > *detail) const;
+                 list<pair<health_status_t,string> > *detail,
+                 CephContext *cct) const override;
 
   int get_monmap(bufferlist &bl);
   int get_monmap(MonMap &m);
index 77e26de70a9f1b26e6c20e7cfee0a439e2b3b60e..ff0690121ae1d1e48a1f45368035df01209ba505 100644 (file)
@@ -2769,7 +2769,8 @@ void OSDMonitor::mark_all_down()
 }
 
 void OSDMonitor::get_health(list<pair<health_status_t,string> >& summary,
-                           list<pair<health_status_t,string> > *detail) const
+                           list<pair<health_status_t,string> > *detail,
+                           CephContext *cct) const
 {
   int num_osds = osdmap.get_num_osds();
 
index 7638b6add95d1b5b48ab62d4eaddcda54c083afe..6c9e0678623f4bb5ad09ada0adb862b3518ac5ce 100644 (file)
@@ -399,7 +399,8 @@ private:
   int parse_osd_id(const char *s, stringstream *pss);
 
   void get_health(list<pair<health_status_t,string> >& summary,
-                 list<pair<health_status_t,string> > *detail) const;
+                 list<pair<health_status_t,string> > *detail,
+                 CephContext *cct) const override;
   bool preprocess_command(MonOpRequestRef op);
   bool prepare_command(MonOpRequestRef op);
   bool prepare_command_impl(MonOpRequestRef op, map<string,cmd_vartype>& cmdmap);
index 6cfa82ac1a1c2f5e89ef332ac168209fb6b0a159..7592551e1b7d1be4c171cb12042f03373385bad7 100644 (file)
@@ -2031,8 +2031,82 @@ int PGMonitor::_warn_slow_request_histogram(const pow2_hist_t& h, string suffix,
   return sum;
 }
 
+namespace {
+  enum class scrubbed_or_deepscrubbed_t { SCRUBBED, DEEPSCRUBBED };
+
+  void print_unscrubbed_detailed(const std::pair<const pg_t,pg_stat_t> &pg_entry,
+                                list<pair<health_status_t,string> > *detail,
+                                scrubbed_or_deepscrubbed_t how_scrubbed) {
+
+    std::stringstream ss;
+    const auto& pg_stat(pg_entry.second);
+
+    ss << "pg " << pg_entry.first << " is not ";
+    if (how_scrubbed == scrubbed_or_deepscrubbed_t::SCRUBBED) {
+      ss << "scrubbed, last_scrub_stamp "
+        << pg_stat.last_scrub_stamp;
+    } else if (how_scrubbed == scrubbed_or_deepscrubbed_t::DEEPSCRUBBED) {
+      ss << "deep-scrubbed, last_deep_scrub_stamp "
+        << pg_stat.last_deep_scrub_stamp;
+    }
+
+    detail->push_back(make_pair(HEALTH_WARN, ss.str()));
+  }
+
+
+  using pg_stat_map_t = const ceph::unordered_map<pg_t,pg_stat_t>;
+
+  void print_unscrubbed_pgs(pg_stat_map_t& pg_stats,
+                           list<pair<health_status_t,string> > &summary,
+                           list<pair<health_status_t,string> > *detail,
+                           const CephContext* cct) {
+    int pgs_count = 0;
+    const utime_t now = ceph_clock_now(nullptr);
+    for (const auto& pg_entry : pg_stats) {
+      const auto& pg_stat(pg_entry.second);
+      const utime_t time_since_ls = now - pg_stat.last_scrub_stamp;
+      const utime_t time_since_lds = now - pg_stat.last_deep_scrub_stamp;
+
+      const int mon_warn_not_scrubbed =
+       cct->_conf->mon_warn_not_scrubbed + cct->_conf->mon_scrub_interval;
+
+      const int mon_warn_not_deep_scrubbed =
+       cct->_conf->mon_warn_not_deep_scrubbed + cct->_conf->mon_scrub_interval;
+
+      bool not_scrubbed = (time_since_ls >= mon_warn_not_scrubbed &&
+                          cct->_conf->mon_warn_not_scrubbed != 0);
+
+      bool not_deep_scrubbed = (time_since_lds >= mon_warn_not_deep_scrubbed &&
+                               cct->_conf->mon_warn_not_deep_scrubbed != 0);
+
+      if (detail != nullptr) {
+       if (not_scrubbed) {
+         print_unscrubbed_detailed(pg_entry,
+                                   detail,
+                                   scrubbed_or_deepscrubbed_t::SCRUBBED);
+       } else if (not_deep_scrubbed) {
+         print_unscrubbed_detailed(pg_entry,
+                                   detail,
+                                   scrubbed_or_deepscrubbed_t::DEEPSCRUBBED);
+       }
+      }
+      if (not_scrubbed || not_deep_scrubbed) {
+       ++pgs_count;
+      }
+    }
+
+    if (pgs_count > 0) {
+      std::stringstream ss;
+      ss << pgs_count << " unscrubbed pgs";
+      summary.push_back(make_pair(HEALTH_WARN, ss.str()));
+    }
+
+  }
+}
+
 void PGMonitor::get_health(list<pair<health_status_t,string> >& summary,
-                           list<pair<health_status_t,string> > *detail) const
+                          list<pair<health_status_t,string> > *detail,
+                          CephContext *cct) const
 {
   map<string,int> note;
   ceph::unordered_map<int,int>::const_iterator p = pg_map.num_pg_by_state.begin();
@@ -2310,6 +2384,9 @@ void PGMonitor::get_health(list<pair<health_status_t,string> >& summary,
       }
     }
   }
+
+  print_unscrubbed_pgs(pg_map.pg_stat, summary, detail, cct);
+
 }
 
 void PGMonitor::check_full_osd_health(list<pair<health_status_t,string> >& summary,
index e2d20950a564e38eb11ee4c0d8975564a121e24f..c5af197fe591f5bafb7f274f476b7745d7cf65af 100644 (file)
@@ -207,7 +207,8 @@ public:
                                   list<pair<health_status_t,string> > *detail) const;
 
   void get_health(list<pair<health_status_t,string> >& summary,
-                 list<pair<health_status_t,string> > *detail) const;
+                 list<pair<health_status_t,string> > *detail,
+                 CephContext *cct) const override;
   void check_full_osd_health(list<pair<health_status_t,string> >& summary,
                             list<pair<health_status_t,string> > *detail,
                             const set<int>& s, const char *desc, health_status_t sev) const;
index 87bf04b2b24e089385b7414009b5e79278bbbfe8..5ede654704356961989bdde5ee5308abf5910459 100644 (file)
@@ -487,7 +487,8 @@ public:
    * @param detail optional list of detailed problem reports; may be NULL
    */
   virtual void get_health(list<pair<health_status_t,string> >& summary,
-                         list<pair<health_status_t,string> > *detail) const { }
+                         list<pair<health_status_t,string> > *detail,
+                         CephContext *cct) const { }
 
  private:
   /**