From: Michal Jarzabek Date: Sat, 23 Jan 2016 12:08:57 +0000 (+0000) Subject: mon/PGMonitor.cc:warning if pg not scrubbed X-Git-Tag: v10.0.4~126^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3d8731209ad39197feedb4710e146173f2958c5d;p=ceph.git mon/PGMonitor.cc:warning if pg not scrubbed Added 2 flags for displaying a warning when pg is unscrubbed: mon report not scrubbed mon report not deep scrubbed Each of them specifies the amount of time past the mon_scrub_interval after which an unscrubbed pg is displayed: ceph -s -for number of unscrubbed pgs ceph health -for number of unscrubbed pgs ceph health detail -for detailed list of unscrubbed pgs Both flags are set to 0 by default, which means they are turned off. Fixes: #13142 Signed-off-by: Michal Jarzabek --- diff --git a/src/common/config_opts.h b/src/common/config_opts.h index dcbb7e027c2..178cec639e4 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -273,6 +273,8 @@ OPTION(mon_health_to_clog_tick_interval, OPT_DOUBLE, 60.0) OPTION(mon_data_avail_crit, OPT_INT, 5) OPTION(mon_data_avail_warn, OPT_INT, 30) OPTION(mon_data_size_warn, OPT_U64, 15*1024*1024*1024) // issue a warning when the monitor's data store goes over 15GB (in bytes) +OPTION(mon_warn_not_scrubbed, OPT_INT, 0) +OPTION(mon_warn_not_deep_scrubbed, OPT_INT, 0) OPTION(mon_scrub_interval, OPT_INT, 3600*24) // once a day OPTION(mon_scrub_timeout, OPT_INT, 60*5) // let's give it 5 minutes; why not. OPTION(mon_scrub_max_keys, OPT_INT, 100) // max number of keys to scrub each time diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index 664af7b03e7..b9f20cd37b0 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -682,7 +682,8 @@ void MDSMonitor::on_active() } void MDSMonitor::get_health(list >& summary, - list > *detail) const + list > *detail, + CephContext* cct) const { mdsmap.get_health(summary, detail); diff --git a/src/mon/MDSMonitor.h b/src/mon/MDSMonitor.h index 03a22764b70..b755ba9e82c 100644 --- a/src/mon/MDSMonitor.h +++ b/src/mon/MDSMonitor.h @@ -97,7 +97,8 @@ class MDSMonitor : public PaxosService { bool prepare_offload_targets(MonOpRequestRef op); void get_health(list >& summary, - list > *detail) const; + list > *detail, + CephContext *cct) const override; int fail_mds(std::ostream &ss, const std::string &arg); void fail_mds_gid(mds_gid_t gid); diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index e54e55e76e4..23f18e168b5 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -2290,7 +2290,7 @@ health_status_t Monitor::get_health(list& status, p != paxos_service.end(); ++p) { PaxosService *s = *p; - s->get_health(summary, detailbl ? &detail : NULL); + s->get_health(summary, detailbl ? &detail : NULL, cct); } health_monitor->get_health(f, summary, (detailbl ? &detail : NULL)); diff --git a/src/mon/MonmapMonitor.cc b/src/mon/MonmapMonitor.cc index 900c2912771..6323b85174d 100644 --- a/src/mon/MonmapMonitor.cc +++ b/src/mon/MonmapMonitor.cc @@ -528,7 +528,8 @@ void MonmapMonitor::tick() } void MonmapMonitor::get_health(list >& summary, - list > *detail) const + list > *detail, + CephContext *cct) const { int max = mon->monmap->size(); int actual = mon->get_quorum().size(); diff --git a/src/mon/MonmapMonitor.h b/src/mon/MonmapMonitor.h index f55409217b5..0b5f37cc372 100644 --- a/src/mon/MonmapMonitor.h +++ b/src/mon/MonmapMonitor.h @@ -68,7 +68,8 @@ class MonmapMonitor : public PaxosService { bool prepare_command(MonOpRequestRef op); void get_health(list >& summary, - list > *detail) const; + list > *detail, + CephContext *cct) const override; int get_monmap(bufferlist &bl); int get_monmap(MonMap &m); diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 77e26de70a9..ff0690121ae 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -2769,7 +2769,8 @@ void OSDMonitor::mark_all_down() } void OSDMonitor::get_health(list >& summary, - list > *detail) const + list > *detail, + CephContext *cct) const { int num_osds = osdmap.get_num_osds(); diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index 7638b6add95..6c9e0678623 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -399,7 +399,8 @@ private: int parse_osd_id(const char *s, stringstream *pss); void get_health(list >& summary, - list > *detail) const; + list > *detail, + CephContext *cct) const override; bool preprocess_command(MonOpRequestRef op); bool prepare_command(MonOpRequestRef op); bool prepare_command_impl(MonOpRequestRef op, map& cmdmap); diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index 6cfa82ac1a1..7592551e1b7 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -2031,8 +2031,82 @@ int PGMonitor::_warn_slow_request_histogram(const pow2_hist_t& h, string suffix, return sum; } +namespace { + enum class scrubbed_or_deepscrubbed_t { SCRUBBED, DEEPSCRUBBED }; + + void print_unscrubbed_detailed(const std::pair &pg_entry, + list > *detail, + scrubbed_or_deepscrubbed_t how_scrubbed) { + + std::stringstream ss; + const auto& pg_stat(pg_entry.second); + + ss << "pg " << pg_entry.first << " is not "; + if (how_scrubbed == scrubbed_or_deepscrubbed_t::SCRUBBED) { + ss << "scrubbed, last_scrub_stamp " + << pg_stat.last_scrub_stamp; + } else if (how_scrubbed == scrubbed_or_deepscrubbed_t::DEEPSCRUBBED) { + ss << "deep-scrubbed, last_deep_scrub_stamp " + << pg_stat.last_deep_scrub_stamp; + } + + detail->push_back(make_pair(HEALTH_WARN, ss.str())); + } + + + using pg_stat_map_t = const ceph::unordered_map; + + void print_unscrubbed_pgs(pg_stat_map_t& pg_stats, + list > &summary, + list > *detail, + const CephContext* cct) { + int pgs_count = 0; + const utime_t now = ceph_clock_now(nullptr); + for (const auto& pg_entry : pg_stats) { + const auto& pg_stat(pg_entry.second); + const utime_t time_since_ls = now - pg_stat.last_scrub_stamp; + const utime_t time_since_lds = now - pg_stat.last_deep_scrub_stamp; + + const int mon_warn_not_scrubbed = + cct->_conf->mon_warn_not_scrubbed + cct->_conf->mon_scrub_interval; + + const int mon_warn_not_deep_scrubbed = + cct->_conf->mon_warn_not_deep_scrubbed + cct->_conf->mon_scrub_interval; + + bool not_scrubbed = (time_since_ls >= mon_warn_not_scrubbed && + cct->_conf->mon_warn_not_scrubbed != 0); + + bool not_deep_scrubbed = (time_since_lds >= mon_warn_not_deep_scrubbed && + cct->_conf->mon_warn_not_deep_scrubbed != 0); + + if (detail != nullptr) { + if (not_scrubbed) { + print_unscrubbed_detailed(pg_entry, + detail, + scrubbed_or_deepscrubbed_t::SCRUBBED); + } else if (not_deep_scrubbed) { + print_unscrubbed_detailed(pg_entry, + detail, + scrubbed_or_deepscrubbed_t::DEEPSCRUBBED); + } + } + if (not_scrubbed || not_deep_scrubbed) { + ++pgs_count; + } + } + + if (pgs_count > 0) { + std::stringstream ss; + ss << pgs_count << " unscrubbed pgs"; + summary.push_back(make_pair(HEALTH_WARN, ss.str())); + } + + } +} + void PGMonitor::get_health(list >& summary, - list > *detail) const + list > *detail, + CephContext *cct) const { map note; ceph::unordered_map::const_iterator p = pg_map.num_pg_by_state.begin(); @@ -2310,6 +2384,9 @@ void PGMonitor::get_health(list >& summary, } } } + + print_unscrubbed_pgs(pg_map.pg_stat, summary, detail, cct); + } void PGMonitor::check_full_osd_health(list >& summary, diff --git a/src/mon/PGMonitor.h b/src/mon/PGMonitor.h index e2d20950a56..c5af197fe59 100644 --- a/src/mon/PGMonitor.h +++ b/src/mon/PGMonitor.h @@ -207,7 +207,8 @@ public: list > *detail) const; void get_health(list >& summary, - list > *detail) const; + list > *detail, + CephContext *cct) const override; void check_full_osd_health(list >& summary, list > *detail, const set& s, const char *desc, health_status_t sev) const; diff --git a/src/mon/PaxosService.h b/src/mon/PaxosService.h index 87bf04b2b24..5ede6547043 100644 --- a/src/mon/PaxosService.h +++ b/src/mon/PaxosService.h @@ -487,7 +487,8 @@ public: * @param detail optional list of detailed problem reports; may be NULL */ virtual void get_health(list >& summary, - list > *detail) const { } + list > *detail, + CephContext *cct) const { } private: /**