From 4f0666085e2fd553ccc05cef8510aae820849aca Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@inktank.com>
Date: Mon, 16 Jun 2014 16:27:05 -0700
Subject: [PATCH] mon/OSDMonitor: make down osd count sensible

We currently log something like

 1/10 in osds are down

in the health warning when there are down OSDs, but this is based on a
comparison of the number of up vs the number of in osds, and makes no sense
when there are up osds that are not in.

Instead, count only the number OSDs that are both down and in (relative to
the total number of OSDs in) and warn about that.  This means that, if a
disk fails, and we mark it out, and the cluster fully repairs itself, it
will go back to a HEALTH_OK state.

I think that is a good thing, and certainly preferable to the current
nonsense.  If we want to distinguish between down+out OSDs that were failed
vs those that have been "acknowledged" by an admin to be dead, we will
need to add some additional state (possibly reusing the AUTOOUT flag?), but
that will require more discussion.

Backport: firefly (maybe)
Signed-off-by: Sage Weil <sage@inktank.com>
(cherry picked from commit 55a97787088b79356c678a909b2410b3924e7f5b)
---
 src/mon/OSDMonitor.cc | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index c949a8b4f22a7..5f7752827c1a5 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -2007,29 +2007,33 @@ void OSDMonitor::get_health(list<pair<health_status_t,string> >& summary,
 			    list<pair<health_status_t,string> > *detail) const
 {
   int num_osds = osdmap.get_num_osds();
-  int num_up_osds = osdmap.get_num_up_osds();
-  int num_in_osds = osdmap.get_num_in_osds();
 
   if (num_osds == 0) {
     summary.push_back(make_pair(HEALTH_ERR, "no osds"));
   } else {
-    if (num_up_osds < num_in_osds) {
-      ostringstream ss;
-      ss << (num_in_osds - num_up_osds) << "/" << num_in_osds << " in osds are down";
-      summary.push_back(make_pair(HEALTH_WARN, ss.str()));
-
-      if (detail) {
-	for (int i = 0; i < osdmap.get_max_osd(); i++) {
-	  if (osdmap.exists(i) && !osdmap.is_up(i)) {
-	    const osd_info_t& info = osdmap.get_info(i);
-	    ostringstream ss;
-	    ss << "osd." << i << " is down since epoch " << info.down_at
-	       << ", last address " << osdmap.get_addr(i);
-	    detail->push_back(make_pair(HEALTH_WARN, ss.str()));
-	  }
+    int num_in_osds = 0;
+    int num_down_in_osds = 0;
+    for (int i = 0; i < osdmap.get_max_osd(); i++) {
+      if (!osdmap.exists(i) || osdmap.is_out(i))
+	continue;
+      ++num_in_osds;
+      if (!osdmap.is_up(i)) {
+	++num_down_in_osds;
+	if (detail) {
+	  const osd_info_t& info = osdmap.get_info(i);
+	  ostringstream ss;
+	  ss << "osd." << i << " is down since epoch " << info.down_at
+	     << ", last address " << osdmap.get_addr(i);
+	  detail->push_back(make_pair(HEALTH_WARN, ss.str()));
 	}
       }
     }
+    assert(num_down_in_osds <= num_in_osds);
+    if (num_down_in_osds > 0) {
+      ostringstream ss;
+      ss << num_down_in_osds << "/" << num_in_osds << " in osds are down";
+      summary.push_back(make_pair(HEALTH_WARN, ss.str()));
+    }
 
     // warn about flags
     if (osdmap.test_flag(CEPH_OSDMAP_PAUSERD |
-- 
2.39.5