From: David Zafman Date: Fri, 14 Apr 2017 03:42:55 +0000 (-0700) Subject: mon, osd: Add detailed full information for now in the mon X-Git-Tag: v12.0.2~51^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=252230786557353f9d52cf633b6400097f9daba3;p=ceph.git mon, osd: Add detailed full information for now in the mon Show ceph health doc output in the correct order Signed-off-by: David Zafman --- diff --git a/doc/rados/troubleshooting/troubleshooting-osd.rst b/doc/rados/troubleshooting/troubleshooting-osd.rst index 651907dfb058..fe29f4767f94 100644 --- a/doc/rados/troubleshooting/troubleshooting-osd.rst +++ b/doc/rados/troubleshooting/troubleshooting-osd.rst @@ -222,16 +222,15 @@ lowering the ``mon osd full ratio``, ``mon osd backfillfull ratio`` and Full ``ceph-osds`` will be reported by ``ceph health``:: ceph health - HEALTH_WARN 1 nearfull osds - osd.2 is near full at 85% + HEALTH_WARN 1 nearfull osd(s) Or:: - ceph health - HEALTH_ERR 1 nearfull osds, 1 backfillfull osds, 1 full osds - osd.2 is near full at 85% + ceph health detail + HEALTH_ERR 1 full osd(s); 1 backfillfull osd(s); 1 nearfull osd(s) osd.3 is full at 97% osd.4 is backfill full at 91% + osd.2 is near full at 87% The best way to deal with a full cluster is to add new ``ceph-osds``, allowing the cluster to redistribute data to the newly available storage. diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 680e9e3a6fe3..0b059271fbb3 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -3402,23 +3402,40 @@ void OSDMonitor::get_health(list >& summary, summary.push_back(make_pair(HEALTH_ERR, ss.str())); } - int full, backfill, nearfull; - osdmap.count_full_nearfull_osds(&full, &backfill, &nearfull); - if (full > 0) { + map full, backfillfull, nearfull; + osdmap.get_full_osd_util(mon->pgmon()->pg_map.osd_stat, &full, &backfillfull, &nearfull); + if (full.size()) { ostringstream ss; - ss << full << " full osd(s)"; + ss << full.size() << " full osd(s)"; summary.push_back(make_pair(HEALTH_ERR, ss.str())); } - if (backfill > 0) { + if (backfillfull.size()) { ostringstream ss; - ss << backfill << " backfillfull osd(s)"; + ss << backfillfull.size() << " backfillfull osd(s)"; summary.push_back(make_pair(HEALTH_WARN, ss.str())); } - if (nearfull > 0) { + if (nearfull.size()) { ostringstream ss; - ss << nearfull << " nearfull osd(s)"; + ss << nearfull.size() << " nearfull osd(s)"; summary.push_back(make_pair(HEALTH_WARN, ss.str())); } + if (detail) { + for (auto& i: full) { + ostringstream ss; + ss << "osd." << i.first << " is full at " << roundf(i.second * 100) << "%"; + detail->push_back(make_pair(HEALTH_ERR, ss.str())); + } + for (auto& i: backfillfull) { + ostringstream ss; + ss << "osd." << i.first << " is backfill full at " << roundf(i.second * 100) << "%"; + detail->push_back(make_pair(HEALTH_WARN, ss.str())); + } + for (auto& i: nearfull) { + ostringstream ss; + ss << "osd." << i.first << " is near full at " << roundf(i.second * 100) << "%"; + detail->push_back(make_pair(HEALTH_WARN, ss.str())); + } + } } // note: we leave it to ceph-mgr to generate details health warnings // with actual osd utilizations diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index 6d0cbfe0a283..5035fab931a8 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -1046,6 +1046,40 @@ void OSDMap::count_full_nearfull_osds(int *full, int *backfill, int *nearfull) c } } +static bool get_osd_utilization(const ceph::unordered_map &osd_stat, + int id, int64_t* kb, int64_t* kb_used, int64_t* kb_avail) { + auto p = osd_stat.find(id); + if (p == osd_stat.end()) + return false; + *kb = p->second.kb; + *kb_used = p->second.kb_used; + *kb_avail = p->second.kb_avail; + return *kb > 0; +} + +void OSDMap::get_full_osd_util(const ceph::unordered_map &osd_stat, + map *full, map *backfill, map *nearfull) const +{ + full->clear(); + backfill->clear(); + nearfull->clear(); + for (int i = 0; i < max_osd; ++i) { + if (exists(i) && is_up(i) && is_in(i)) { + int64_t kb, kb_used, kb_avail; + if (osd_state[i] & CEPH_OSD_FULL) { + if (get_osd_utilization(osd_stat, i, &kb, &kb_used, &kb_avail)) + full->emplace(i, (float)kb_used / (float)kb); + } else if (osd_state[i] & CEPH_OSD_BACKFILLFULL) { + if (get_osd_utilization(osd_stat, i, &kb, &kb_used, &kb_avail)) + backfill->emplace(i, (float)kb_used / (float)kb); + } else if (osd_state[i] & CEPH_OSD_NEARFULL) { + if (get_osd_utilization(osd_stat, i, &kb, &kb_used, &kb_avail)) + nearfull->emplace(i, (float)kb_used / (float)kb); + } + } + } +} + void OSDMap::get_all_osds(set& ls) const { for (int i=0; i &osd_stat, + map *full, map *backfill, map *nearfull) const; /***** cluster state *****/ /* osds */