From a9addc61f51dc483c273e995d51a99ce33094270 Mon Sep 17 00:00:00 2001 From: Wido den Hollander Date: Sat, 16 Jan 2016 13:00:26 +0100 Subject: [PATCH] mon: Go into ERR state if multiple PGs are stuck inactive If >=X PGs are stuck inactive longer than 'mon_pg_stuck_threshold' we go into ERR state. This is useful for situations where one or more PGs stay stuck in peering or undersized state due to a OSD failure. RBD volumes can become fully unresponsive if one or more PGs are inactive. Fixes: #13923 Signed-off-by: Wido den Hollander --- src/common/config_opts.h | 1 + src/mon/PGMonitor.cc | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 8542162fb09b..657ad6c61cc8 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -236,6 +236,7 @@ OPTION(mon_clock_drift_warn_backoff, OPT_FLOAT, 5) // exponential backoff for cl OPTION(mon_timecheck_interval, OPT_FLOAT, 300.0) // on leader, timecheck (clock drift check) interval (seconds) OPTION(mon_pg_create_interval, OPT_FLOAT, 30.0) // no more than every 30s OPTION(mon_pg_stuck_threshold, OPT_INT, 300) // number of seconds after which pgs can be considered inactive, unclean, or stale (see doc/control.rst under dump_stuck for more info) +OPTION(mon_pg_min_inactive, OPT_U64, 1) // the number of PGs which have to be inactive longer than 'mon_pg_stuck_threshold' before health goes into ERR. 0 means disabled, never go into ERR. OPTION(mon_pg_warn_min_per_osd, OPT_INT, 30) // min # pgs per (in) osd before we warn the admin OPTION(mon_pg_warn_max_per_osd, OPT_INT, 300) // max # pgs per (in) osd before we warn the admin OPTION(mon_pg_warn_max_object_skew, OPT_FLOAT, 10.0) // max skew few average in objects per pg diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index 6cfa82ac1a1c..9db9977a2dcd 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -2071,10 +2071,12 @@ void PGMonitor::get_health(list >& summary, ceph::unordered_map stuck_pgs; utime_t now(ceph_clock_now(g_ceph_context)); utime_t cutoff = now - utime_t(g_conf->mon_pg_stuck_threshold, 0); + uint64_t num_inactive_pgs = 0; pg_map.get_stuck_stats(PGMap::STUCK_INACTIVE, cutoff, stuck_pgs); if (!stuck_pgs.empty()) { note["stuck inactive"] = stuck_pgs.size(); + num_inactive_pgs += stuck_pgs.size(); if (detail) note_stuck_detail(PGMap::STUCK_INACTIVE, stuck_pgs, detail); } @@ -2107,10 +2109,17 @@ void PGMonitor::get_health(list >& summary, pg_map.get_stuck_stats(PGMap::STUCK_STALE, cutoff, stuck_pgs); if (!stuck_pgs.empty()) { note["stuck stale"] = stuck_pgs.size(); + num_inactive_pgs += stuck_pgs.size(); if (detail) note_stuck_detail(PGMap::STUCK_STALE, stuck_pgs, detail); } + if (g_conf->mon_pg_min_inactive > 0 && num_inactive_pgs >= g_conf->mon_pg_min_inactive) { + ostringstream ss; + ss << num_inactive_pgs << " pgs are stuck inactive for more than " << g_conf->mon_pg_stuck_threshold << " seconds"; + summary.push_back(make_pair(HEALTH_ERR, ss.str())); + } + if (!note.empty()) { for (map::iterator p = note.begin(); p != note.end(); ++p) { ostringstream ss; -- 2.47.3