]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mon: simplify PG health checks
authorJohn Spray <john.spray@redhat.com>
Tue, 4 Jul 2017 13:52:59 +0000 (09:52 -0400)
committerSage Weil <sage@redhat.com>
Wed, 12 Jul 2017 16:52:03 +0000 (12:52 -0400)
Instead of a distinct health check for each possible
PG state, group the states into categories for availability,
degraded, damage, and report on that.

That way, while a PG/pool is suffering from one of those
bad PG states, health conditions don't keep toggling on and
off as we transition from one unavailable state to another
unavailable state.

Signed-off-by: John Spray <john.spray@redhat.com>
src/mon/PGMap.cc

index aa44d5543491b5a87668cce6899eda2278e1be52..ac42d3a747647184d1c37193224af0dc7142094e 100644 (file)
@@ -2553,207 +2553,262 @@ namespace {
   }
 }
 
-static void note_stuck_detail(
-  pg_t pgid,
-  const pg_stat_t& stat,
-  const char *what,
-  utime_t since,
-  utime_t now,
-  unsigned max,
-  list<string> *detail)
-{
-  if (detail->size() >= max) {
-    return;
-  }
-  ostringstream ss;
-  ss << "pg " << pgid << " is stuck " << what;
-  if (since == utime_t()) {
-    ss << " since forever";
-  } else {
-    utime_t dur = now - since;
-    ss << " for " << dur;
-  }
-  ss << ", current state " << pg_state_string(stat.state)
-     << ", last acting " << stat.acting;
-  detail->push_back(ss.str());
-}
-
-static void note_stuck_summary(
-  const char *what,
-  unsigned num,
-  unsigned max,
-  health_check_t *check)
-{
-  ostringstream ss;
-  ss << num << " pgs stuck " << what;
-  check->summary = ss.str();
-  if (num > max) {
-    ostringstream ss;
-    ss << "+ " << (max - num) << " more pgs";
-    check->detail.push_back(ss.str());
-  }
-}
-
 void PGMap::get_health_checks(
   CephContext *cct,
   const OSDMap& osdmap,
   health_check_map_t *checks) const
 {
-  unsigned max = cct->_conf->mon_health_max_detail;
-  auto& pools = osdmap.get_pools();
   utime_t now = ceph_clock_now();
+  const unsigned max = cct->_conf->mon_health_max_detail;
+  const auto& pools = osdmap.get_pools();
 
   checks->clear();
 
-  // PG_{STALE,DOWN,UNDERSIZED,DEGRADED,INCONSISTENT,PEERING,REPAIR,...}
-  {
-    struct info_t {
-      unsigned state;
-      health_status_t severity;
-      unsigned num = 0;
-      health_check_t *check = nullptr;
-      info_t(unsigned s, health_status_t v = HEALTH_WARN)
-       : state(s), severity(v) {}
-    };
-    vector<info_t> state_checks = {
-      { PG_STATE_DOWN },
-      { PG_STATE_UNDERSIZED },
-      { PG_STATE_DEGRADED },
-      { PG_STATE_INCONSISTENT, HEALTH_ERR },
-      { PG_STATE_REPAIR },
-      { PG_STATE_RECOVERY_WAIT },
-      { PG_STATE_INCOMPLETE, HEALTH_ERR },
-      { PG_STATE_BACKFILL_WAIT },
-      { PG_STATE_BACKFILL },
-      { PG_STATE_BACKFILL_TOOFULL },
-      { PG_STATE_RECOVERY_TOOFULL }
-    };
-    vector<info_t> active_checks;
-    for (auto& i : num_pg_by_state) {
-      for (auto& j : state_checks) {
-       if ((i.first & j.state) && !j.check) {
-         string err = string("PG_") + pg_state_string(j.state);
-         boost::to_upper(err);
-         j.check = &checks->add(err, j.severity, "");
-         active_checks.push_back(j);
-       }
-      }
+  typedef enum pg_consequence_t {
+    UNAVAILABLE = 1,   // Client IO to the pool may block
+    DEGRADED = 2,      // Fewer than the requested number of replicas are present
+    DEGRADED_FULL = 3, // Fewer than the request number of replicas may be present
+                       //  and insufficiet resources are present to fix this
+    DAMAGED = 4        // The data may be missing or inconsistent on disk and
+                       //  requires repair
+  } pg_consequence_t;
+
+  // For a given PG state, how should it be reported at the pool level?
+  class PgStateResponse {
+    public:
+    pg_consequence_t consequence;
+    typedef std::function< utime_t(const pg_stat_t&) > stuck_cb;
+    stuck_cb stuck_since;
+    bool invert;
+
+    PgStateResponse(const pg_consequence_t &c, stuck_cb s)
+      : consequence(c), stuck_since(s), invert(false)
+    {
     }
-    if (!active_checks.empty()) {
-      // scan pgs
-      for (auto& i : pg_stat) {
-       for (auto& j : active_checks) {
-         if (i.second.state & j.state) {
-           if (++j.num <= max) {
-             ostringstream ss;
-             ss << "pg " << i.first << " is "
-                << pg_state_string(i.second.state);
-             ss << ", acting " << i.second.acting;
-             if (i.second.stats.sum.num_objects_unfound)
-               ss << ", " << i.second.stats.sum.num_objects_unfound
-                  << " unfound";
-             if (j.state & PG_STATE_INCOMPLETE) {
-               const pg_pool_t *pi = osdmap.get_pg_pool(i.first.pool());
-               if (pi && pi->min_size > 1) {
-                 ss << " (reducing pool "
-                    << osdmap.get_pool_name(i.first.pool())
-                    << " min_size from " << (int)pi->min_size
-                    << " may help; search ceph.com/docs for 'incomplete')";
-               }
-             }
-             j.check->detail.push_back(ss.str());
-           }
-         }
-       }
-      }
-      for (auto& j : active_checks) {
-       ostringstream ss;
-       ss << j.num << " pgs " << pg_state_string(j.state);
-        j.check->summary = ss.str();
-        if (j.num > max) {
-         ostringstream ss;
-         ss << "+ " << (max - j.num) << " more pgs";
-         j.check->detail.push_back(ss.str());
-       }
+
+    PgStateResponse(const pg_consequence_t &c, stuck_cb s, bool i)
+      : consequence(c), stuck_since(s), invert(i)
+    {
+    }
+  };
+
+  // Record the PG state counts that contributed to a reported pool state
+  class PgCauses {
+    public:
+    // Map of PG_STATE_* to number of pgs in that state.
+    std::map<unsigned, unsigned> states;
+
+    // List of all PG IDs that had a state contributing
+    // to this health condition.
+    std::set<pg_t> pgs;
+
+    std::map<pg_t, std::string> pg_messages;
+  };
+
+  // Map of PG state to how to respond to it
+  std::map<unsigned, PgStateResponse> state_to_response = {
+    // Immediate reports
+    { PG_STATE_INCONSISTENT,     {DAMAGED,     {}} },
+    { PG_STATE_INCOMPLETE,       {DEGRADED,    {}} },
+    { PG_STATE_REPAIR,           {DAMAGED,     {}} },
+    { PG_STATE_SNAPTRIM_ERROR,   {DAMAGED,     {}} },
+    { PG_STATE_BACKFILL_TOOFULL, {DEGRADED,    {}} },
+    { PG_STATE_RECOVERY_TOOFULL, {DEGRADED,    {}} },
+    { PG_STATE_DEGRADED,         {DEGRADED,    {}} },
+    { PG_STATE_DOWN,             {UNAVAILABLE, {}} },
+    // Delayed (wait until stuck) reports
+    { PG_STATE_PEERING,          {UNAVAILABLE, [](const pg_stat_t &p){return p.last_peered;}    } },
+    { PG_STATE_UNDERSIZED,       {DEGRADED,    [](const pg_stat_t &p){return p.last_fullsized;} } },
+    { PG_STATE_STALE,            {UNAVAILABLE, [](const pg_stat_t &p){return p.last_unstale;}   } },
+    // Delayed and inverted reports
+    { PG_STATE_ACTIVE,           {UNAVAILABLE, [](const pg_stat_t &p){return p.last_active;}, true} },
+    { PG_STATE_CLEAN,            {DEGRADED,    [](const pg_stat_t &p){return p.last_clean;}, true} }
+  };
+
+  // Specialized state printer that takes account of inversion of
+  // ACTIVE, CLEAN checks.
+  auto state_name = [](const uint32_t &state) {
+    // Special cases for the states that are inverted checks
+    if (state == PG_STATE_CLEAN) {
+      return std::string("unclean");
+    } else if (state == PG_STATE_ACTIVE) {
+      return std::string("inactive");
+    } else {
+      return pg_state_string(state);
+    }
+  };
+
+  // Map of what is wrong to information about why, implicitly also stores
+  // the list of what is wrong.
+  std::map<pg_consequence_t, PgCauses> detected;
+
+  // Optimisation: trim down the number of checks to apply based on
+  // the summary counters
+  std::map<unsigned, PgStateResponse> possible_responses;
+  for (const auto &i : num_pg_by_state) {
+    for (const auto &j : state_to_response) {
+      if (!j.second.invert) {
+        // Check for normal tests by seeing if any pgs have the flag
+        if (i.first & j.first) {
+          possible_responses.insert(j);
+        }
       }
     }
   }
 
-  // PG_STUCK_{INACTIVE,UNCLEAN,UNDERSIZED,DEGRADED,STALE}
-  {
-    utime_t cutoff = now - utime_t(cct->_conf->mon_pg_stuck_threshold, 0);
-    health_check_t *inactive = 0;
-    health_check_t *unclean = 0;
-    health_check_t *degraded = 0;
-    health_check_t *undersized = 0;
-    health_check_t *stale = 0;
-    unsigned num_inactive = 0;
-    unsigned num_unclean = 0;
-    unsigned num_degraded = 0;
-    unsigned num_undersized = 0;
-    unsigned num_stale = 0;
-    for (auto& i : pg_stat) {
-      if (!(i.second.state & PG_STATE_ACTIVE) &&
-         i.second.last_active < cutoff) {
-       if (!inactive) {
-         inactive = &checks->add("PG_STUCK_INACTIVE", HEALTH_ERR, "");
-       }
-       note_stuck_detail(i.first, i.second, "inactive", i.second.last_active,
-                         now, max, &inactive->detail);
-       ++num_inactive;
+  for (const auto &j : state_to_response) {
+    if (j.second.invert) {
+      // Check for inverted tests by seeing if not-all pgs have the flag
+      const auto &found = num_pg_by_state.find(j.first);
+      if (found == num_pg_by_state.end() || found->second != num_pg) {
+        possible_responses.insert(j);
       }
-      if ((i.second.state & PG_STATE_STALE) &&
-         i.second.last_unstale < cutoff) {
-       if (!stale) {
-         stale = &checks->add("PG_STUCK_STALE", HEALTH_ERR, "");
-       }
-       note_stuck_detail(i.first, i.second, "stale", i.second.last_unstale,
-                         now, max, &stale->detail);
-       ++num_stale;
-      }
-      if (!(i.second.state & PG_STATE_CLEAN) &&
-         i.second.last_clean < cutoff) {
-       if (!unclean) {
-         unclean = &checks->add("PG_STUCK_UNCLEAN", HEALTH_WARN, "");
-       }
-       note_stuck_detail(i.first, i.second, "unclean", i.second.last_clean,
-                         now, max, &unclean->detail);
-       ++num_unclean;
-      }
-      if ((i.second.state & PG_STATE_DEGRADED) &&
-         i.second.last_undegraded < cutoff) {
-       if (!degraded) {
-         degraded = &checks->add("PG_STUCK_DEGRADED", HEALTH_WARN, "");
-       }
-       note_stuck_detail(i.first, i.second, "degraded",
-                         i.second.last_undegraded, now, max, &degraded->detail);
-       ++num_degraded;
-      }
-      if ((i.second.state & PG_STATE_UNDERSIZED) &&
-         i.second.last_fullsized < cutoff) {
-       if (!undersized) {
-         undersized = &checks->add("PG_STUCK_UNDERSIZED", HEALTH_WARN, "");
-       }
-       note_stuck_detail(i.first, i.second, "undersized",
-                         i.second.last_fullsized, now, max,
-                         &undersized->detail);
-       ++num_undersized;
-      }
-    }
-    if (inactive) {
-      note_stuck_summary("inactive", num_inactive, max, inactive);
     }
-    if (unclean) {
-      note_stuck_summary("unclean", num_unclean, max, unclean);
+  }
+
+  utime_t cutoff = now - utime_t(cct->_conf->mon_pg_stuck_threshold, 0);
+  // Loop over all PGs, if there are any possibly-unhealthy states in there
+  if (!possible_responses.empty()) {
+    for (const auto& i : pg_stat) {
+      const auto &pg_id = i.first;
+      const auto &pg_info = i.second;
+
+      for (const auto &j : state_to_response) {
+        const auto &pg_response_state = j.first;
+        const auto &pg_response = j.second;
+
+        // Apply the state test
+        if (!(bool(pg_info.state & pg_response_state) != pg_response.invert)) {
+          continue;
+        }
+
+        // Apply stuckness test if needed
+        if (pg_response.stuck_since) {
+          // Delayed response, check for stuckness
+          utime_t last_whatever = pg_response.stuck_since(pg_info);
+          if (last_whatever >= cutoff) {
+            // Not stuck enough, ignore.
+            continue;
+          } else {
+
+          }
+        }
+
+        auto &causes = detected[pg_response.consequence];
+        causes.states[pg_response_state]++;
+        causes.pgs.insert(pg_id);
+
+        // Don't bother composing detail string if we have already recorded
+        // too many
+        if (causes.pg_messages.size() > max) {
+          continue;
+        }
+
+        std::ostringstream ss;
+        if (pg_response.stuck_since) {
+          utime_t since = pg_response.stuck_since(pg_info);
+          ss << "pg " << pg_id << " is stuck " << state_name(pg_response_state);
+          if (since == utime_t()) {
+            ss << " since forever";
+          } else {
+            utime_t dur = now - since;
+            ss << " for " << dur;
+          }
+          ss << ", current state " << pg_state_string(pg_info.state)
+             << ", last acting " << pg_info.acting;
+        } else {
+          ss << "pg " << pg_id << " is "
+             << pg_state_string(pg_info.state);
+          ss << ", acting " << pg_info.acting;
+          if (pg_info.stats.sum.num_objects_unfound) {
+            ss << ", " << pg_info.stats.sum.num_objects_unfound
+               << " unfound";
+          }
+        }
+
+        if (pg_info.state & PG_STATE_INCOMPLETE) {
+          const pg_pool_t *pi = osdmap.get_pg_pool(pg_id.pool());
+          if (pi && pi->min_size > 1) {
+            ss << " (reducing pool "
+               << osdmap.get_pool_name(pg_id.pool())
+               << " min_size from " << (int)pi->min_size
+               << " may help; search ceph.com/docs for 'incomplete')";
+          }
+        }
+
+        causes.pg_messages[pg_id] = ss.str();
+      }
     }
-    if (degraded) {
-      note_stuck_summary("degraded", num_degraded, max, degraded);
+  } else {
+    dout(10) << __func__ << " skipping loop over PGs: counters look OK" << dendl;
+  }
+
+  for (const auto &i : detected) {
+    std::string health_code;
+    health_status_t sev;
+    std::string summary;
+    switch(i.first) {
+      case UNAVAILABLE:
+        health_code = "PG_AVAILABILITY";
+        sev = HEALTH_WARN;
+        summary = "Reduced data availability: ";
+        break;
+      case DEGRADED:
+        health_code = "PG_DEGRADED";
+        summary = "Degraded data redundancy: ";
+        sev = HEALTH_WARN;
+        break;
+      case DEGRADED_FULL:
+        health_code = "PG_DEGRADED_FULL";
+        summary = "Degraded data redundancy (low space): ";
+        sev = HEALTH_ERR;
+        break;
+      case DAMAGED:
+        health_code = "PG_DAMAGED";
+        summary = "Possible data damage: ";
+        sev = HEALTH_ERR;
+        break;
+      default:
+        assert(false);
+    }
+
+    if (i.first == DEGRADED) {
+      if (pg_sum.stats.sum.num_objects_degraded &&
+          pg_sum.stats.sum.num_object_copies > 0) {
+        double pc = (double)pg_sum.stats.sum.num_objects_degraded /
+          (double)pg_sum.stats.sum.num_object_copies * (double)100.0;
+        char b[20];
+        snprintf(b, sizeof(b), "%.3lf", pc);
+        ostringstream ss;
+        ss << pg_sum.stats.sum.num_objects_degraded
+           << "/" << pg_sum.stats.sum.num_object_copies << " objects degraded ("
+           << b << "%)";
+
+        // Throw in a comma for the benefit of the following PG counts
+        summary += ss.str() + ", ";
+      }
     }
-    if (undersized) {
-      note_stuck_summary("undersized", num_undersized, max, undersized);
+
+    // Compose summary message saying how many PGs in what states led
+    // to this health check failing
+    std::vector<std::string> pg_msgs;
+    for (const auto &j : i.second.states) {
+      std::ostringstream msg;
+      msg << j.second << (j.second > 1 ? " pgs " : " pg ") << state_name(j.first);
+      pg_msgs.push_back(msg.str());
     }
-    if (stale) {
-      note_stuck_summary("stale", num_stale, max, stale);
+    summary += joinify(pg_msgs.begin(), pg_msgs.end(), std::string(", "));
+
+
+
+    health_check_t *check = &checks->add(
+        health_code,
+        sev,
+        summary);
+
+    // Compose list of PGs contributing to this health check failing
+    for (const auto &j : i.second.pg_messages) {
+      check->detail.push_back(j.second);
     }
   }
 
@@ -3003,20 +3058,6 @@ void PGMap::get_health_checks(
     }
   }
 
-  // OBJECT_DEGRADED
-  if (pg_sum.stats.sum.num_objects_degraded &&
-      pg_sum.stats.sum.num_object_copies > 0) {
-    double pc = (double)pg_sum.stats.sum.num_objects_degraded /
-      (double)pg_sum.stats.sum.num_object_copies * (double)100.0;
-    char b[20];
-    snprintf(b, sizeof(b), "%.3lf", pc);
-    ostringstream ss;
-    ss << pg_sum.stats.sum.num_objects_degraded
-       << "/" << pg_sum.stats.sum.num_object_copies << " objects degraded ("
-       << b << "%)";
-    checks->add("OBJECT_DEGRADED", HEALTH_WARN, ss.str());
-  }
-
   // OBJECT_MISPLACED
   if (pg_sum.stats.sum.num_objects_misplaced &&
       pg_sum.stats.sum.num_object_copies > 0) {