From: Sage Weil Date: Thu, 15 Jun 2017 02:23:42 +0000 (-0400) Subject: mon/MDSMonitor: implement new-style cephfs health checks X-Git-Tag: v12.1.1~58^2~36 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=69ebbebb955d082dddc821e3270715a3ce61df84;p=ceph.git mon/MDSMonitor: implement new-style cephfs health checks Our detail elements are still strings, so we keep the bit that collapses the metadata into a string and appends it to the string. Each MDS-generated item becomes a detail record. Health checks are consolidated either by MDS_ or FS_, counting the number of mds servers or file systems affected. Signed-off-by: Sage Weil --- diff --git a/src/mds/FSMap.cc b/src/mds/FSMap.cc index eb08d02b7dcb..cdce14b60892 100644 --- a/src/mds/FSMap.cc +++ b/src/mds/FSMap.cc @@ -18,6 +18,8 @@ #include using std::stringstream; +#include "mon/health_check.h" + void Filesystem::dump(Formatter *f) const { @@ -327,6 +329,30 @@ bool FSMap::check_health(void) return changed; } +void FSMap::get_health_checks(health_check_map_t *checks) const +{ + mds_rank_t standby_count_wanted = 0; + for (const auto &i : filesystems) { + const auto &fs = i.second; + health_check_map_t fschecks; + fs->mds_map.get_health_checks(&fschecks); + checks->merge(fschecks); + standby_count_wanted = std::max( + standby_count_wanted, + fs->mds_map.get_standby_count_wanted((mds_rank_t)standby_daemons.size())); + } + + // MDS_INSUFFICIENT_STANDBY + if (standby_count_wanted) { + std::ostringstream oss, dss; + oss << "insufficient standby daemons available"; + auto& d = checks->add("MDS_INSUFFICIENT_STANDBY", HEALTH_WARN, oss.str()); + dss << "have " << standby_daemons.size() << "; want " << standby_count_wanted + << " more"; + d.detail.push_back(dss.str()); + } +} + void FSMap::encode(bufferlist& bl, uint64_t features) const { if (features & CEPH_FEATURE_SERVER_JEWEL) { diff --git a/src/mds/FSMap.h b/src/mds/FSMap.h index 3d389c48885b..ea102a712740 100644 --- a/src/mds/FSMap.h +++ b/src/mds/FSMap.h @@ -35,6 +35,7 @@ #include "mds/mdstypes.h" class CephContext; +class health_check_map_t; #define MDS_FEATURE_INCOMPAT_BASE CompatSet::Feature(1, "base v0.20") #define MDS_FEATURE_INCOMPAT_CLIENTRANGES CompatSet::Feature(2, "client writeable ranges") @@ -476,6 +477,8 @@ public: void get_health(list >& summary, list > *detail) const; + void get_health_checks(health_check_map_t *checks) const; + bool check_health(void); /** diff --git a/src/mds/MDSMap.cc b/src/mds/MDSMap.cc index b397eb089e9f..bd54469756f4 100644 --- a/src/mds/MDSMap.cc +++ b/src/mds/MDSMap.cc @@ -18,6 +18,8 @@ #include using std::stringstream; +#include "mon/health_check.h" + // features CompatSet get_mdsmap_compat_set_all() { @@ -404,6 +406,78 @@ void MDSMap::get_health(list >& summary, } } +void MDSMap::get_health_checks(health_check_map_t *checks) const +{ + // FS_WITH_FAILED_MDS + // MDS_FAILED + if (!failed.empty()) { + health_check_t& fscheck = checks->add( + "FS_WITH_FAILED_MDS", HEALTH_WARN, + "%num% filesystem%plurals% %isorare% have a failed mds daemon"); + ostringstream ss; + ss << "fs " << fs_name << " has " << failed.size() << " failed mds" + << (failed.size() > 1 ? "s" : ""); + fscheck.detail.push_back(ss.str()); + + health_check_t& check = checks->add("MDS_FAILED", HEALTH_ERR, + "%num% mds daemon%plurals% down"); + for (auto p : failed) { + std::ostringstream oss; + oss << "fs " << fs_name << " mds." << p << " has failed"; + check.detail.push_back(oss.str()); + } + } + + // MDS_DAMAGED + if (!damaged.empty()) { + health_check_t& check = checks->add("MDS_DAMAGED", HEALTH_ERR, + "%num% mds daemon%plurals% damaged"); + for (auto p : damaged) { + std::ostringstream oss; + oss << "fs " << fs_name << " mds." << p << " is damaged"; + check.detail.push_back(oss.str()); + } + } + + // FS_DEGRADED + // MDS_DEGRADED + if (is_degraded()) { + health_check_t& fscheck = checks->add( + "FS_DEGRADED", HEALTH_WARN, + "%num% filesystem%plurals% %isorare% degraded"); + ostringstream ss; + ss << "fs " << fs_name << " is degraded"; + fscheck.detail.push_back(ss.str()); + + list detail; + for (mds_rank_t i = mds_rank_t(0); i< get_max_mds(); i++) { + if (!is_up(i)) + continue; + mds_gid_t gid = up.find(i)->second; + map::const_iterator info = mds_info.find(gid); + stringstream ss; + ss << "fs " << fs_name << " mds." << info->second.name << " at " + << info->second.addr << " rank " << i; + if (is_resolve(i)) + ss << " is resolving"; + if (is_replay(i)) + ss << " is replaying journal"; + if (is_rejoin(i)) + ss << " is rejoining"; + if (is_reconnect(i)) + ss << " is reconnecting to clients"; + if (ss.str().length()) + detail.push_back(ss.str()); + } + if (!detail.empty()) { + health_check_t& check = checks->add( + "MDS_DEGRADED", HEALTH_WARN, + "%num% mds daemon%plurals% %isorare% degraded"); + check.detail.insert(check.detail.end(), detail.begin(), detail.end()); + } + } +} + void MDSMap::mds_info_t::encode_versioned(bufferlist& bl, uint64_t features) const { ENCODE_START(7, 4, bl); diff --git a/src/mds/MDSMap.h b/src/mds/MDSMap.h index d94204715192..e6423c9bea1f 100644 --- a/src/mds/MDSMap.h +++ b/src/mds/MDSMap.h @@ -59,6 +59,7 @@ */ class CephContext; +class health_check_map_t; extern CompatSet get_mdsmap_compat_set_all(); extern CompatSet get_mdsmap_compat_set_default(); @@ -462,6 +463,8 @@ public: void get_health(list >& summary, list > *detail) const; + void get_health_checks(health_check_map_t *checks) const; + typedef enum { AVAILABLE = 0, diff --git a/src/messages/MMDSBeacon.h b/src/messages/MMDSBeacon.h index 31febe50a353..a83502e85c44 100644 --- a/src/messages/MMDSBeacon.h +++ b/src/messages/MMDSBeacon.h @@ -43,6 +43,56 @@ enum mds_metric_t { MDS_HEALTH_CACHE_OVERSIZED }; +static inline const char *mds_metric_name(mds_metric_t m) +{ + switch (m) { + case MDS_HEALTH_TRIM: return "MDS_TRIM"; + case MDS_HEALTH_CLIENT_RECALL: return "MDS_CLIENT_RECALL"; + case MDS_HEALTH_CLIENT_LATE_RELEASE: return "MDS_CLIENT_LATE_RELEASE"; + case MDS_HEALTH_CLIENT_RECALL_MANY: return "MDS_CLIENT_RECALL_MANY"; + case MDS_HEALTH_CLIENT_LATE_RELEASE_MANY: return "MDS_CLIENT_LATE_RELEASE_MANY"; + case MDS_HEALTH_CLIENT_OLDEST_TID: return "MDS_CLIENT_OLDEST_TID"; + case MDS_HEALTH_CLIENT_OLDEST_TID_MANY: return "MDS_CLIENT_OLDEST_TID_MANY"; + case MDS_HEALTH_DAMAGE: return "MDS_DAMAGE"; + case MDS_HEALTH_READ_ONLY: return "MDS_READ_ONLY"; + case MDS_HEALTH_SLOW_REQUEST: return "MDS_SLOW_REQUEST"; + case MDS_HEALTH_CACHE_OVERSIZED: return "MDS_CACHE_OVERSIZED"; + default: + return "???"; + } +} + +static inline const char *mds_metric_summary(mds_metric_t m) +{ + switch (m) { + case MDS_HEALTH_TRIM: + return "%num% MDSs behind on trimming"; + case MDS_HEALTH_CLIENT_RECALL: + return "%num% clients failing to respond to cache pressure"; + case MDS_HEALTH_CLIENT_LATE_RELEASE: + return "%num% clients failing to respond to capability release"; + case MDS_HEALTH_CLIENT_RECALL_MANY: + return "%num% MDSs have many clients failing to respond to cache pressure"; + case MDS_HEALTH_CLIENT_LATE_RELEASE_MANY: + return "%num% MDSs have many clients failing to respond to capability " + "release"; + case MDS_HEALTH_CLIENT_OLDEST_TID: + return "%num% clients failing to advance oldest client/flush tid"; + case MDS_HEALTH_CLIENT_OLDEST_TID_MANY: + return "%num% MDSs have clients failing to advance oldest client/flush tid"; + case MDS_HEALTH_DAMAGE: + return "%num% MDSs report damaged metadata"; + case MDS_HEALTH_READ_ONLY: + return "%num% MDSs are read only"; + case MDS_HEALTH_SLOW_REQUEST: + return "%num% MDSs report slow requests"; + case MDS_HEALTH_CACHE_OVERSIZED: + return "%num% MDSs report oversized cache"; + default: + return "???"; + } +} + /** * This structure is designed to allow some flexibility in how we emit health * complaints, such that: diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index 608e1aeedc3e..2d7e309023ed 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -14,6 +14,7 @@ #include #include +#include #include "MDSMonitor.h" #include "FSCommands.h" @@ -99,6 +100,8 @@ void MDSMonitor::update_from_paxos(bool *need_bootstrap) << ", my e " << fsmap.epoch << dendl; assert(version > fsmap.epoch); + load_health(); + // read and decode bufferlist fsmap_bl; fsmap_bl.clear(); @@ -174,6 +177,65 @@ void MDSMonitor::encode_pending(MonitorDBStore::TransactionRef t) } pending_daemon_health_rm.clear(); remove_from_metadata(t); + + // health + health_check_map_t new_checks; + const auto info_map = pending_fsmap.get_mds_info(); + for (const auto &i : info_map) { + const auto &gid = i.first; + const auto &info = i.second; + if (pending_daemon_health_rm.count(gid)) { + continue; + } + MDSHealth health; + auto p = pending_daemon_health.find(gid); + if (p != pending_daemon_health.end()) { + health = p->second; + } else { + bufferlist bl; + mon->store->get(MDS_HEALTH_PREFIX, stringify(gid), bl); + if (!bl.length()) { + derr << "Missing health data for MDS " << gid << dendl; + continue; + } + bufferlist::iterator bl_i = bl.begin(); + health.decode(bl_i); + } + for (const auto &metric : health.metrics) { + int const rank = info.rank; + health_check_t *check = &new_checks.add( + mds_metric_name(metric.type), + metric.sev, + mds_metric_summary(metric.type)); + ostringstream ss; + ss << "mds" << info.name << "(mds." << rank << "): " << metric.message; + for (auto p = metric.metadata.begin(); + p != metric.metadata.end(); + ++p) { + if (p != metric.metadata.begin()) { + ss << ", "; + } + ss << p->first << ": " << p->second; + } + check->detail.push_back(ss.str()); + } + } + pending_fsmap.get_health_checks(&new_checks); + for (auto& p : new_checks.checks) { + p.second.summary = boost::regex_replace( + p.second.summary, + boost::regex("%num%"), + stringify(p.second.detail.size())); + p.second.summary = boost::regex_replace( + p.second.summary, + boost::regex("%plurals%"), + p.second.detail.size() > 1 ? "s" : ""); + p.second.summary = boost::regex_replace( + p.second.summary, + boost::regex("%isorare%"), + p.second.detail.size() > 1 ? "are" : "is"); + } + encode_health(new_checks, t); } version_t MDSMonitor::get_trim_to()