From daa0793c393a21bd2dd2ec52a0efd181e1032400 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 16 May 2017 13:20:51 -0400 Subject: [PATCH] mon/PGMonitor: move most of get_health into PGMap Leave the legacy full/nearfull stuff in PGMonitor (although we'll still need to clean out the PGMap fields too). This is cut and paste, some stripping of pg_map., g_conf -> cct->_conf, and a bit of whitespace cleanup. Signed-off-by: Sage Weil --- src/mon/PGMap.cc | 495 +++++++++++++++++++++++++++++++++++++++++++ src/mon/PGMap.h | 9 +- src/mon/PGMonitor.cc | 479 +---------------------------------------- 3 files changed, 504 insertions(+), 479 deletions(-) diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index 5bcecd97a67..f3559963bb6 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -2297,6 +2297,501 @@ void PGMap::dump_filtered_pg_stats(ostream& ss, set& pgs) const ss << tab; } + + +// Only called with a single bit set in "what" +static void note_stuck_detail(int what, + ceph::unordered_map& stuck_pgs, + list > *detail) +{ + for (ceph::unordered_map::iterator p = stuck_pgs.begin(); + p != stuck_pgs.end(); + ++p) { + ostringstream ss; + utime_t since; + const char *whatname = 0; + switch (what) { + case PGMap::STUCK_INACTIVE: + since = p->second.last_active; + whatname = "inactive"; + break; + case PGMap::STUCK_UNCLEAN: + since = p->second.last_clean; + whatname = "unclean"; + break; + case PGMap::STUCK_DEGRADED: + since = p->second.last_undegraded; + whatname = "degraded"; + break; + case PGMap::STUCK_UNDERSIZED: + since = p->second.last_fullsized; + whatname = "undersized"; + break; + case PGMap::STUCK_STALE: + since = p->second.last_unstale; + whatname = "stale"; + break; + default: + ceph_abort(); + } + ss << "pg " << p->first << " is stuck " << whatname; + if (since == utime_t()) { + ss << " since forever"; + } else { + utime_t dur = ceph_clock_now() - since; + ss << " for " << dur; + } + ss << ", current state " << pg_state_string(p->second.state) + << ", last acting " << p->second.acting; + detail->push_back(make_pair(HEALTH_WARN, ss.str())); + } +} + +static int _warn_slow_request_histogram( + CephContext *cct, + const pow2_hist_t& h, + string suffix, + list >& summary, + list > *detail) +{ + if (h.h.empty()) + return 0; + + unsigned sum = 0; + for (unsigned i = h.h.size() - 1; i > 0; --i) { + float ub = (float)(1 << i) / 1000.0; + if (ub < cct->_conf->mon_osd_max_op_age) + break; + ostringstream ss; + if (h.h[i]) { + ss << h.h[i] << " ops are blocked > " << ub << " sec" << suffix; + if (detail) + detail->push_back(make_pair(HEALTH_WARN, ss.str())); + sum += h.h[i]; + } + } + return sum; +} + +namespace { + enum class scrubbed_or_deepscrubbed_t { SCRUBBED, DEEPSCRUBBED }; + + void print_unscrubbed_detailed(const std::pair &pg_entry, + list > *detail, + scrubbed_or_deepscrubbed_t how_scrubbed) { + + std::stringstream ss; + const auto& pg_stat(pg_entry.second); + + ss << "pg " << pg_entry.first << " is not "; + if (how_scrubbed == scrubbed_or_deepscrubbed_t::SCRUBBED) { + ss << "scrubbed, last_scrub_stamp " + << pg_stat.last_scrub_stamp; + } else if (how_scrubbed == scrubbed_or_deepscrubbed_t::DEEPSCRUBBED) { + ss << "deep-scrubbed, last_deep_scrub_stamp " + << pg_stat.last_deep_scrub_stamp; + } + + detail->push_back(make_pair(HEALTH_WARN, ss.str())); + } + + + using pg_stat_map_t = const ceph::unordered_map; + + void print_unscrubbed_pgs(pg_stat_map_t& pg_stats, + list > &summary, + list > *detail, + const CephContext* cct) { + if (cct->_conf->mon_warn_not_scrubbed == 0 && + cct->_conf->mon_warn_not_deep_scrubbed == 0) + return; + + int pgs_count = 0; + const utime_t now = ceph_clock_now(); + for (const auto& pg_entry : pg_stats) { + const auto& pg_stat(pg_entry.second); + const utime_t time_since_ls = now - pg_stat.last_scrub_stamp; + const utime_t time_since_lds = now - pg_stat.last_deep_scrub_stamp; + + const int mon_warn_not_scrubbed = + cct->_conf->mon_warn_not_scrubbed + cct->_conf->mon_scrub_interval; + + const int mon_warn_not_deep_scrubbed = + cct->_conf->mon_warn_not_deep_scrubbed + cct->_conf->osd_deep_scrub_interval; + + bool not_scrubbed = (time_since_ls >= mon_warn_not_scrubbed && + cct->_conf->mon_warn_not_scrubbed != 0); + + bool not_deep_scrubbed = (time_since_lds >= mon_warn_not_deep_scrubbed && + cct->_conf->mon_warn_not_deep_scrubbed != 0); + + if (detail != nullptr) { + if (not_scrubbed) { + print_unscrubbed_detailed(pg_entry, + detail, + scrubbed_or_deepscrubbed_t::SCRUBBED); + } + if (not_deep_scrubbed) { + print_unscrubbed_detailed(pg_entry, + detail, + scrubbed_or_deepscrubbed_t::DEEPSCRUBBED); + } + } + if (not_scrubbed || not_deep_scrubbed) { + ++pgs_count; + } + } + + if (pgs_count > 0) { + std::stringstream ss; + ss << pgs_count << " unscrubbed pgs"; + summary.push_back(make_pair(HEALTH_WARN, ss.str())); + } + + } +} + +void PGMap::get_health( + CephContext *cct, + const OSDMap& osdmap, + list >& summary, + list > *detail) const +{ + map note; + ceph::unordered_map::const_iterator p = num_pg_by_state.begin(); + ceph::unordered_map::const_iterator p_end = num_pg_by_state.end(); + for (; p != p_end; ++p) { + if (p->first & PG_STATE_STALE) + note["stale"] += p->second; + if (p->first & PG_STATE_DOWN) + note["down"] += p->second; + if (p->first & PG_STATE_UNDERSIZED) + note["undersized"] += p->second; + if (p->first & PG_STATE_DEGRADED) + note["degraded"] += p->second; + if (p->first & PG_STATE_INCONSISTENT) + note["inconsistent"] += p->second; + if (p->first & PG_STATE_PEERING) + note["peering"] += p->second; + if (p->first & PG_STATE_REPAIR) + note["repair"] += p->second; + if (p->first & PG_STATE_RECOVERING) + note["recovering"] += p->second; + if (p->first & PG_STATE_RECOVERY_WAIT) + note["recovery_wait"] += p->second; + if (p->first & PG_STATE_INCOMPLETE) + note["incomplete"] += p->second; + if (p->first & PG_STATE_BACKFILL_WAIT) + note["backfill_wait"] += p->second; + if (p->first & PG_STATE_BACKFILL) + note["backfilling"] += p->second; + if (p->first & PG_STATE_BACKFILL_TOOFULL) + note["backfill_toofull"] += p->second; + if (p->first & PG_STATE_RECOVERY_TOOFULL) + note["recovery_toofull"] += p->second; + } + + ceph::unordered_map stuck_pgs; + utime_t now(ceph_clock_now()); + utime_t cutoff = now - utime_t(cct->_conf->mon_pg_stuck_threshold, 0); + uint64_t num_inactive_pgs = 0; + + if (detail) { + // we need to collect details of stuck pgs, first do a quick check + // whether this will yield any results + if (get_stuck_counts(cutoff, note)) { + + // there are stuck pgs. gather details for specified statuses + // only if we know that there are pgs stuck in that status + + if (note.find("stuck inactive") != note.end()) { + get_stuck_stats(PGMap::STUCK_INACTIVE, cutoff, stuck_pgs); + note["stuck inactive"] = stuck_pgs.size(); + num_inactive_pgs += stuck_pgs.size(); + note_stuck_detail(PGMap::STUCK_INACTIVE, stuck_pgs, detail); + stuck_pgs.clear(); + } + + if (note.find("stuck unclean") != note.end()) { + get_stuck_stats(PGMap::STUCK_UNCLEAN, cutoff, stuck_pgs); + note["stuck unclean"] = stuck_pgs.size(); + note_stuck_detail(PGMap::STUCK_UNCLEAN, stuck_pgs, detail); + stuck_pgs.clear(); + } + + if (note.find("stuck undersized") != note.end()) { + get_stuck_stats(PGMap::STUCK_UNDERSIZED, cutoff, stuck_pgs); + note["stuck undersized"] = stuck_pgs.size(); + note_stuck_detail(PGMap::STUCK_UNDERSIZED, stuck_pgs, detail); + stuck_pgs.clear(); + } + + if (note.find("stuck degraded") != note.end()) { + get_stuck_stats(PGMap::STUCK_DEGRADED, cutoff, stuck_pgs); + note["stuck degraded"] = stuck_pgs.size(); + note_stuck_detail(PGMap::STUCK_DEGRADED, stuck_pgs, detail); + stuck_pgs.clear(); + } + + if (note.find("stuck stale") != note.end()) { + get_stuck_stats(PGMap::STUCK_STALE, cutoff, stuck_pgs); + note["stuck stale"] = stuck_pgs.size(); + num_inactive_pgs += stuck_pgs.size(); + note_stuck_detail(PGMap::STUCK_STALE, stuck_pgs, detail); + } + } + } else { + get_stuck_counts(cutoff, note); + map::const_iterator p = note.find("stuck inactive"); + if (p != note.end()) + num_inactive_pgs += p->second; + p = note.find("stuck stale"); + if (p != note.end()) + num_inactive_pgs += p->second; + } + + if (cct->_conf->mon_pg_min_inactive > 0 && + num_inactive_pgs >= cct->_conf->mon_pg_min_inactive) { + ostringstream ss; + ss << num_inactive_pgs << " pgs are stuck inactive for more than " << cct->_conf->mon_pg_stuck_threshold << " seconds"; + summary.push_back(make_pair(HEALTH_ERR, ss.str())); + } + + if (!note.empty()) { + for (map::iterator p = note.begin(); p != note.end(); ++p) { + ostringstream ss; + ss << p->second << " pgs " << p->first; + summary.push_back(make_pair(HEALTH_WARN, ss.str())); + } + if (detail) { + for (ceph::unordered_map::const_iterator p = pg_stat.begin(); + p != pg_stat.end(); + ++p) { + if ((p->second.state & (PG_STATE_STALE | + PG_STATE_DOWN | + PG_STATE_UNDERSIZED | + PG_STATE_DEGRADED | + PG_STATE_INCONSISTENT | + PG_STATE_PEERING | + PG_STATE_REPAIR | + PG_STATE_RECOVERING | + PG_STATE_RECOVERY_WAIT | + PG_STATE_RECOVERY_TOOFULL | + PG_STATE_INCOMPLETE | + PG_STATE_BACKFILL_WAIT | + PG_STATE_BACKFILL | + PG_STATE_BACKFILL_TOOFULL)) && + stuck_pgs.count(p->first) == 0) { + ostringstream ss; + ss << "pg " << p->first << " is " << pg_state_string(p->second.state); + ss << ", acting " << p->second.acting; + if (p->second.stats.sum.num_objects_unfound) + ss << ", " << p->second.stats.sum.num_objects_unfound << " unfound"; + if (p->second.state & PG_STATE_INCOMPLETE) { + const pg_pool_t *pi = osdmap.get_pg_pool(p->first.pool()); + if (pi && pi->min_size > 1) { + ss << " (reducing pool " << osdmap.get_pool_name(p->first.pool()) + << " min_size from " << (int)pi->min_size << " may help; search ceph.com/docs for 'incomplete')"; + } + } + detail->push_back(make_pair(HEALTH_WARN, ss.str())); + } + } + } + } + + // slow requests + if (cct->_conf->mon_osd_max_op_age > 0 && + osd_sum.op_queue_age_hist.upper_bound() > cct->_conf->mon_osd_max_op_age) { + unsigned sum = _warn_slow_request_histogram( + cct, osd_sum.op_queue_age_hist, "", summary, NULL); + if (sum > 0) { + ostringstream ss; + ss << sum << " requests are blocked > " << cct->_conf->mon_osd_max_op_age + << " sec"; + summary.push_back(make_pair(HEALTH_WARN, ss.str())); + + if (detail) { + unsigned num_slow_osds = 0; + // do per-osd warnings + for (auto p = osd_stat.begin(); + p != osd_stat.end(); + ++p) { + if (_warn_slow_request_histogram( + cct, + p->second.op_queue_age_hist, + string(" on osd.") + stringify(p->first), + summary, detail)) + ++num_slow_osds; + } + ostringstream ss2; + ss2 << num_slow_osds << " osds have slow requests"; + summary.push_back(make_pair(HEALTH_WARN, ss2.str())); + detail->push_back(make_pair(HEALTH_WARN, ss2.str())); + } + } + } + + if (cct->_conf->mon_warn_osd_usage_min_max_delta) { + float max_osd_usage = 0.0, min_osd_usage = 1.0; + for (auto p = osd_stat.begin(); p != osd_stat.end(); ++p) { + // kb should never be 0, but avoid divide by zero in case of corruption + if (p->second.kb <= 0) + continue; + float usage = ((float)p->second.kb_used) / ((float)p->second.kb); + if (usage > max_osd_usage) + max_osd_usage = usage; + if (usage < min_osd_usage) + min_osd_usage = usage; + } + float diff = max_osd_usage - min_osd_usage; + if (diff > cct->_conf->mon_warn_osd_usage_min_max_delta) { + ostringstream ss; + ss << "difference between min (" << roundf(min_osd_usage*1000.0)/100.0 + << "%) and max (" << roundf(max_osd_usage*1000.0)/100.0 + << "%) osd usage " << roundf(diff*1000.0)/100.0 << "% > " + << roundf(cct->_conf->mon_warn_osd_usage_min_max_delta*1000.0)/100.0 + << " (mon_warn_osd_usage_min_max_delta)"; + summary.push_back(make_pair(HEALTH_WARN, ss.str())); + if (detail) + detail->push_back(make_pair(HEALTH_WARN, ss.str())); + } + } + + // recovery + list sl; + overall_recovery_summary(NULL, &sl); + for (list::iterator p = sl.begin(); p != sl.end(); ++p) { + summary.push_back(make_pair(HEALTH_WARN, "recovery " + *p)); + if (detail) + detail->push_back(make_pair(HEALTH_WARN, "recovery " + *p)); + } + + // near-target max pools + auto& pools = osdmap.get_pools(); + for (auto p = pools.begin(); + p != pools.end(); ++p) { + if ((!p->second.target_max_objects && !p->second.target_max_bytes) || + !pg_pool_sum.count(p->first)) + continue; + bool nearfull = false; + const string& name = osdmap.get_pool_name(p->first); + const pool_stat_t& st = get_pg_pool_sum_stat(p->first); + uint64_t ratio = p->second.cache_target_full_ratio_micro + + ((1000000 - p->second.cache_target_full_ratio_micro) * + cct->_conf->mon_cache_target_full_warn_ratio); + if (p->second.target_max_objects && + (uint64_t)(st.stats.sum.num_objects - + st.stats.sum.num_objects_hit_set_archive) > + p->second.target_max_objects * (ratio / 1000000.0)) { + nearfull = true; + if (detail) { + ostringstream ss; + ss << "cache pool '" << name << "' with " + << si_t(st.stats.sum.num_objects) + << " objects at/near target max " + << si_t(p->second.target_max_objects) << " objects"; + detail->push_back(make_pair(HEALTH_WARN, ss.str())); + } + } + if (p->second.target_max_bytes && + (uint64_t)(st.stats.sum.num_bytes - + st.stats.sum.num_bytes_hit_set_archive) > + p->second.target_max_bytes * (ratio / 1000000.0)) { + nearfull = true; + if (detail) { + ostringstream ss; + ss << "cache pool '" << name + << "' with " << si_t(st.stats.sum.num_bytes) + << "B at/near target max " + << si_t(p->second.target_max_bytes) << "B"; + detail->push_back(make_pair(HEALTH_WARN, ss.str())); + } + } + if (nearfull) { + ostringstream ss; + ss << "'" << name << "' at/near target max"; + summary.push_back(make_pair(HEALTH_WARN, ss.str())); + } + } + + // scrub + if (pg_sum.stats.sum.num_scrub_errors) { + ostringstream ss; + ss << pg_sum.stats.sum.num_scrub_errors << " scrub errors"; + summary.push_back(make_pair(HEALTH_ERR, ss.str())); + if (detail) { + detail->push_back(make_pair(HEALTH_ERR, ss.str())); + } + } + + // pg skew + int num_in = osdmap.get_num_in_osds(); + int sum_pg_up = MAX(pg_sum.up, static_cast(pg_stat.size())); + if (num_in && cct->_conf->mon_pg_warn_min_per_osd > 0) { + int per = sum_pg_up / num_in; + if (per < cct->_conf->mon_pg_warn_min_per_osd && per) { + ostringstream ss; + ss << "too few PGs per OSD (" << per << " < min " << cct->_conf->mon_pg_warn_min_per_osd << ")"; + summary.push_back(make_pair(HEALTH_WARN, ss.str())); + if (detail) + detail->push_back(make_pair(HEALTH_WARN, ss.str())); + } + } + if (num_in && cct->_conf->mon_pg_warn_max_per_osd > 0) { + int per = sum_pg_up / num_in; + if (per > cct->_conf->mon_pg_warn_max_per_osd) { + ostringstream ss; + ss << "too many PGs per OSD (" << per << " > max " << cct->_conf->mon_pg_warn_max_per_osd << ")"; + summary.push_back(make_pair(HEALTH_WARN, ss.str())); + if (detail) + detail->push_back(make_pair(HEALTH_WARN, ss.str())); + } + } + if (!pg_stat.empty()) { + for (ceph::unordered_map::const_iterator p = pg_pool_sum.begin(); + p != pg_pool_sum.end(); + ++p) { + const pg_pool_t *pi = osdmap.get_pg_pool(p->first); + if (!pi) + continue; // in case osdmap changes haven't propagated to PGMap yet + const string& name = osdmap.get_pool_name(p->first); + if (pi->get_pg_num() > pi->get_pgp_num() && + !(name.find(".DELETED") != string::npos && + cct->_conf->mon_fake_pool_delete)) { + ostringstream ss; + ss << "pool " << name << " pg_num " + << pi->get_pg_num() << " > pgp_num " << pi->get_pgp_num(); + summary.push_back(make_pair(HEALTH_WARN, ss.str())); + if (detail) + detail->push_back(make_pair(HEALTH_WARN, ss.str())); + } + int average_objects_per_pg = pg_sum.stats.sum.num_objects / pg_stat.size(); + if (average_objects_per_pg > 0 && + pg_sum.stats.sum.num_objects >= cct->_conf->mon_pg_warn_min_objects && + p->second.stats.sum.num_objects >= cct->_conf->mon_pg_warn_min_pool_objects) { + int objects_per_pg = p->second.stats.sum.num_objects / pi->get_pg_num(); + float ratio = (float)objects_per_pg / (float)average_objects_per_pg; + if (cct->_conf->mon_pg_warn_max_object_skew > 0 && + ratio > cct->_conf->mon_pg_warn_max_object_skew) { + ostringstream ss; + ss << "pool " << name << " has many more objects per pg than average (too few pgs?)"; + summary.push_back(make_pair(HEALTH_WARN, ss.str())); + if (detail) { + ostringstream ss; + ss << "pool " << name << " objects per pg (" + << objects_per_pg << ") is more than " << ratio << " times cluster average (" + << average_objects_per_pg << ")"; + detail->push_back(make_pair(HEALTH_WARN, ss.str())); + } + } + } + } + } + + print_unscrubbed_pgs(pg_stat, summary, detail, cct); +} + int process_pg_map_command( const string& orig_prefix, const map& orig_cmdmap, diff --git a/src/mon/PGMap.h b/src/mon/PGMap.h index fee4313bcc4..c6fc98007ca 100644 --- a/src/mon/PGMap.h +++ b/src/mon/PGMap.h @@ -185,8 +185,8 @@ public: epoch_t last_osdmap_epoch; // last osdmap epoch i applied to the pgmap epoch_t last_pg_scan; // osdmap epoch ceph::unordered_map pg_stat; - set full_osds; - set nearfull_osds; + set full_osds; // for pre-luminous only + set nearfull_osds; // for pre-luminous only float full_ratio; float nearfull_ratio; @@ -435,6 +435,11 @@ public: return .95; } + void get_health(CephContext *cct, + const OSDMap& osdmap, + list >& summary, + list > *detail) const; + static void generate_test_instances(list& o); }; WRITE_CLASS_ENCODER_FEATURES(PGMap::Incremental) diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index 0736af18d4b..6522361656e 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -1127,493 +1127,18 @@ update: return true; } -// Only called with a single bit set in "what" -static void note_stuck_detail(int what, - ceph::unordered_map& stuck_pgs, - list > *detail) -{ - for (ceph::unordered_map::iterator p = stuck_pgs.begin(); - p != stuck_pgs.end(); - ++p) { - ostringstream ss; - utime_t since; - const char *whatname = 0; - switch (what) { - case PGMap::STUCK_INACTIVE: - since = p->second.last_active; - whatname = "inactive"; - break; - case PGMap::STUCK_UNCLEAN: - since = p->second.last_clean; - whatname = "unclean"; - break; - case PGMap::STUCK_DEGRADED: - since = p->second.last_undegraded; - whatname = "degraded"; - break; - case PGMap::STUCK_UNDERSIZED: - since = p->second.last_fullsized; - whatname = "undersized"; - break; - case PGMap::STUCK_STALE: - since = p->second.last_unstale; - whatname = "stale"; - break; - default: - ceph_abort(); - } - ss << "pg " << p->first << " is stuck " << whatname; - if (since == utime_t()) { - ss << " since forever"; - } else { - utime_t dur = ceph_clock_now() - since; - ss << " for " << dur; - } - ss << ", current state " << pg_state_string(p->second.state) - << ", last acting " << p->second.acting; - detail->push_back(make_pair(HEALTH_WARN, ss.str())); - } -} - -int PGMonitor::_warn_slow_request_histogram(const pow2_hist_t& h, string suffix, - list >& summary, - list > *detail) const -{ - if (h.h.empty()) - return 0; - - unsigned sum = 0; - for (unsigned i = h.h.size() - 1; i > 0; --i) { - float ub = (float)(1 << i) / 1000.0; - if (ub < g_conf->mon_osd_max_op_age) - break; - ostringstream ss; - if (h.h[i]) { - ss << h.h[i] << " ops are blocked > " << ub << " sec" << suffix; - if (detail) - detail->push_back(make_pair(HEALTH_WARN, ss.str())); - sum += h.h[i]; - } - } - return sum; -} - -namespace { - enum class scrubbed_or_deepscrubbed_t { SCRUBBED, DEEPSCRUBBED }; - - void print_unscrubbed_detailed(const std::pair &pg_entry, - list > *detail, - scrubbed_or_deepscrubbed_t how_scrubbed) { - - std::stringstream ss; - const auto& pg_stat(pg_entry.second); - - ss << "pg " << pg_entry.first << " is not "; - if (how_scrubbed == scrubbed_or_deepscrubbed_t::SCRUBBED) { - ss << "scrubbed, last_scrub_stamp " - << pg_stat.last_scrub_stamp; - } else if (how_scrubbed == scrubbed_or_deepscrubbed_t::DEEPSCRUBBED) { - ss << "deep-scrubbed, last_deep_scrub_stamp " - << pg_stat.last_deep_scrub_stamp; - } - - detail->push_back(make_pair(HEALTH_WARN, ss.str())); - } - - - using pg_stat_map_t = const ceph::unordered_map; - - void print_unscrubbed_pgs(pg_stat_map_t& pg_stats, - list > &summary, - list > *detail, - const CephContext* cct) { - if (cct->_conf->mon_warn_not_scrubbed == 0 && - cct->_conf->mon_warn_not_deep_scrubbed == 0) - return; - - int pgs_count = 0; - const utime_t now = ceph_clock_now(); - for (const auto& pg_entry : pg_stats) { - const auto& pg_stat(pg_entry.second); - const utime_t time_since_ls = now - pg_stat.last_scrub_stamp; - const utime_t time_since_lds = now - pg_stat.last_deep_scrub_stamp; - - const int mon_warn_not_scrubbed = - cct->_conf->mon_warn_not_scrubbed + cct->_conf->mon_scrub_interval; - - const int mon_warn_not_deep_scrubbed = - cct->_conf->mon_warn_not_deep_scrubbed + cct->_conf->osd_deep_scrub_interval; - - bool not_scrubbed = (time_since_ls >= mon_warn_not_scrubbed && - cct->_conf->mon_warn_not_scrubbed != 0); - - bool not_deep_scrubbed = (time_since_lds >= mon_warn_not_deep_scrubbed && - cct->_conf->mon_warn_not_deep_scrubbed != 0); - - if (detail != nullptr) { - if (not_scrubbed) { - print_unscrubbed_detailed(pg_entry, - detail, - scrubbed_or_deepscrubbed_t::SCRUBBED); - } - if (not_deep_scrubbed) { - print_unscrubbed_detailed(pg_entry, - detail, - scrubbed_or_deepscrubbed_t::DEEPSCRUBBED); - } - } - if (not_scrubbed || not_deep_scrubbed) { - ++pgs_count; - } - } - - if (pgs_count > 0) { - std::stringstream ss; - ss << pgs_count << " unscrubbed pgs"; - summary.push_back(make_pair(HEALTH_WARN, ss.str())); - } - - } -} - void PGMonitor::get_health(list >& summary, list > *detail, CephContext *cct) const { - map note; - ceph::unordered_map::const_iterator p = pg_map.num_pg_by_state.begin(); - ceph::unordered_map::const_iterator p_end = pg_map.num_pg_by_state.end(); - for (; p != p_end; ++p) { - if (p->first & PG_STATE_STALE) - note["stale"] += p->second; - if (p->first & PG_STATE_DOWN) - note["down"] += p->second; - if (p->first & PG_STATE_UNDERSIZED) - note["undersized"] += p->second; - if (p->first & PG_STATE_DEGRADED) - note["degraded"] += p->second; - if (p->first & PG_STATE_INCONSISTENT) - note["inconsistent"] += p->second; - if (p->first & PG_STATE_PEERING) - note["peering"] += p->second; - if (p->first & PG_STATE_REPAIR) - note["repair"] += p->second; - if (p->first & PG_STATE_RECOVERING) - note["recovering"] += p->second; - if (p->first & PG_STATE_RECOVERY_WAIT) - note["recovery_wait"] += p->second; - if (p->first & PG_STATE_INCOMPLETE) - note["incomplete"] += p->second; - if (p->first & PG_STATE_BACKFILL_WAIT) - note["backfill_wait"] += p->second; - if (p->first & PG_STATE_BACKFILL) - note["backfilling"] += p->second; - if (p->first & PG_STATE_BACKFILL_TOOFULL) - note["backfill_toofull"] += p->second; - if (p->first & PG_STATE_RECOVERY_TOOFULL) - note["recovery_toofull"] += p->second; - } - - ceph::unordered_map stuck_pgs; - utime_t now(ceph_clock_now()); - utime_t cutoff = now - utime_t(g_conf->mon_pg_stuck_threshold, 0); - uint64_t num_inactive_pgs = 0; - - if (detail) { - - // we need to collect details of stuck pgs, first do a quick check - // whether this will yield any results - if (pg_map.get_stuck_counts(cutoff, note)) { - - // there are stuck pgs. gather details for specified statuses - // only if we know that there are pgs stuck in that status - - if (note.find("stuck inactive") != note.end()) { - pg_map.get_stuck_stats(PGMap::STUCK_INACTIVE, cutoff, stuck_pgs); - note["stuck inactive"] = stuck_pgs.size(); - num_inactive_pgs += stuck_pgs.size(); - note_stuck_detail(PGMap::STUCK_INACTIVE, stuck_pgs, detail); - stuck_pgs.clear(); - } - - if (note.find("stuck unclean") != note.end()) { - pg_map.get_stuck_stats(PGMap::STUCK_UNCLEAN, cutoff, stuck_pgs); - note["stuck unclean"] = stuck_pgs.size(); - note_stuck_detail(PGMap::STUCK_UNCLEAN, stuck_pgs, detail); - stuck_pgs.clear(); - } - - if (note.find("stuck undersized") != note.end()) { - pg_map.get_stuck_stats(PGMap::STUCK_UNDERSIZED, cutoff, stuck_pgs); - note["stuck undersized"] = stuck_pgs.size(); - note_stuck_detail(PGMap::STUCK_UNDERSIZED, stuck_pgs, detail); - stuck_pgs.clear(); - } - - if (note.find("stuck degraded") != note.end()) { - pg_map.get_stuck_stats(PGMap::STUCK_DEGRADED, cutoff, stuck_pgs); - note["stuck degraded"] = stuck_pgs.size(); - note_stuck_detail(PGMap::STUCK_DEGRADED, stuck_pgs, detail); - stuck_pgs.clear(); - } - - if (note.find("stuck stale") != note.end()) { - pg_map.get_stuck_stats(PGMap::STUCK_STALE, cutoff, stuck_pgs); - note["stuck stale"] = stuck_pgs.size(); - num_inactive_pgs += stuck_pgs.size(); - note_stuck_detail(PGMap::STUCK_STALE, stuck_pgs, detail); - } - } - } else { - pg_map.get_stuck_counts(cutoff, note); - map::const_iterator p = note.find("stuck inactive"); - if (p != note.end()) - num_inactive_pgs += p->second; - p = note.find("stuck stale"); - if (p != note.end()) - num_inactive_pgs += p->second; - } - - if (g_conf->mon_pg_min_inactive > 0 && num_inactive_pgs >= g_conf->mon_pg_min_inactive) { - ostringstream ss; - ss << num_inactive_pgs << " pgs are stuck inactive for more than " << g_conf->mon_pg_stuck_threshold << " seconds"; - summary.push_back(make_pair(HEALTH_ERR, ss.str())); - } - - if (!note.empty()) { - for (map::iterator p = note.begin(); p != note.end(); ++p) { - ostringstream ss; - ss << p->second << " pgs " << p->first; - summary.push_back(make_pair(HEALTH_WARN, ss.str())); - } - if (detail) { - for (ceph::unordered_map::const_iterator p = pg_map.pg_stat.begin(); - p != pg_map.pg_stat.end(); - ++p) { - if ((p->second.state & (PG_STATE_STALE | - PG_STATE_DOWN | - PG_STATE_UNDERSIZED | - PG_STATE_DEGRADED | - PG_STATE_INCONSISTENT | - PG_STATE_PEERING | - PG_STATE_REPAIR | - PG_STATE_RECOVERING | - PG_STATE_RECOVERY_WAIT | - PG_STATE_RECOVERY_TOOFULL | - PG_STATE_INCOMPLETE | - PG_STATE_BACKFILL_WAIT | - PG_STATE_BACKFILL | - PG_STATE_BACKFILL_TOOFULL)) && - stuck_pgs.count(p->first) == 0) { - ostringstream ss; - ss << "pg " << p->first << " is " << pg_state_string(p->second.state); - ss << ", acting " << p->second.acting; - if (p->second.stats.sum.num_objects_unfound) - ss << ", " << p->second.stats.sum.num_objects_unfound << " unfound"; - if (p->second.state & PG_STATE_INCOMPLETE) { - const pg_pool_t *pi = mon->osdmon()->osdmap.get_pg_pool(p->first.pool()); - if (pi && pi->min_size > 1) { - ss << " (reducing pool " << mon->osdmon()->osdmap.get_pool_name(p->first.pool()) - << " min_size from " << (int)pi->min_size << " may help; search ceph.com/docs for 'incomplete')"; - } - } - detail->push_back(make_pair(HEALTH_WARN, ss.str())); - } - } - } - } - - // slow requests - if (g_conf->mon_osd_max_op_age > 0 && - pg_map.osd_sum.op_queue_age_hist.upper_bound() > g_conf->mon_osd_max_op_age) { - unsigned sum = _warn_slow_request_histogram(pg_map.osd_sum.op_queue_age_hist, "", summary, NULL); - if (sum > 0) { - ostringstream ss; - ss << sum << " requests are blocked > " << g_conf->mon_osd_max_op_age << " sec"; - summary.push_back(make_pair(HEALTH_WARN, ss.str())); - - if (detail) { - unsigned num_slow_osds = 0; - // do per-osd warnings - for (ceph::unordered_map::const_iterator p = pg_map.osd_stat.begin(); - p != pg_map.osd_stat.end(); - ++p) { - if (_warn_slow_request_histogram(p->second.op_queue_age_hist, - string(" on osd.") + stringify(p->first), - summary, detail)) - ++num_slow_osds; - } - ostringstream ss2; - ss2 << num_slow_osds << " osds have slow requests"; - summary.push_back(make_pair(HEALTH_WARN, ss2.str())); - detail->push_back(make_pair(HEALTH_WARN, ss2.str())); - } - } - } - - if (g_conf->mon_warn_osd_usage_min_max_delta) { - float max_osd_usage = 0.0, min_osd_usage = 1.0; - for (auto p = pg_map.osd_stat.begin(); p != pg_map.osd_stat.end(); ++p) { - // kb should never be 0, but avoid divide by zero in case of corruption - if (p->second.kb <= 0) - continue; - float usage = ((float)p->second.kb_used) / ((float)p->second.kb); - if (usage > max_osd_usage) - max_osd_usage = usage; - if (usage < min_osd_usage) - min_osd_usage = usage; - } - float diff = max_osd_usage - min_osd_usage; - if (diff > g_conf->mon_warn_osd_usage_min_max_delta) { - ostringstream ss; - ss << "difference between min (" << roundf(min_osd_usage*1000.0)/100.0 - << "%) and max (" << roundf(max_osd_usage*1000.0)/100.0 - << "%) osd usage " << roundf(diff*1000.0)/100.0 << "% > " - << roundf(g_conf->mon_warn_osd_usage_min_max_delta*1000.0)/100.0 - << " (mon_warn_osd_usage_min_max_delta)"; - summary.push_back(make_pair(HEALTH_WARN, ss.str())); - if (detail) - detail->push_back(make_pair(HEALTH_WARN, ss.str())); - } - } - - // recovery - list sl; - pg_map.overall_recovery_summary(NULL, &sl); - for (list::iterator p = sl.begin(); p != sl.end(); ++p) { - summary.push_back(make_pair(HEALTH_WARN, "recovery " + *p)); - if (detail) - detail->push_back(make_pair(HEALTH_WARN, "recovery " + *p)); - } - - // full/nearfull + // legacy pre-luminous full/nearfull if (mon->osdmon()->osdmap.require_osd_release < CEPH_RELEASE_LUMINOUS) { check_full_osd_health(summary, detail, pg_map.full_osds, "full", HEALTH_ERR); check_full_osd_health(summary, detail, pg_map.nearfull_osds, "near full", HEALTH_WARN); } - - // near-target max pools - auto& pools = mon->osdmon()->osdmap.get_pools(); - for (auto p = pools.begin(); - p != pools.end(); ++p) { - if ((!p->second.target_max_objects && !p->second.target_max_bytes) || - !pg_map.pg_pool_sum.count(p->first)) - continue; - bool nearfull = false; - const string& name = mon->osdmon()->osdmap.get_pool_name(p->first); - const pool_stat_t& st = pg_map.get_pg_pool_sum_stat(p->first); - uint64_t ratio = p->second.cache_target_full_ratio_micro + - ((1000000 - p->second.cache_target_full_ratio_micro) * - g_conf->mon_cache_target_full_warn_ratio); - if (p->second.target_max_objects && (uint64_t)(st.stats.sum.num_objects - st.stats.sum.num_objects_hit_set_archive) > - p->second.target_max_objects * (ratio / 1000000.0)) { - nearfull = true; - if (detail) { - ostringstream ss; - ss << "cache pool '" << name << "' with " - << si_t(st.stats.sum.num_objects) - << " objects at/near target max " - << si_t(p->second.target_max_objects) << " objects"; - detail->push_back(make_pair(HEALTH_WARN, ss.str())); - } - } - if (p->second.target_max_bytes && (uint64_t)(st.stats.sum.num_bytes - st.stats.sum.num_bytes_hit_set_archive) > - p->second.target_max_bytes * (ratio / 1000000.0)) { - nearfull = true; - if (detail) { - ostringstream ss; - ss << "cache pool '" << name - << "' with " << si_t(st.stats.sum.num_bytes) - << "B at/near target max " - << si_t(p->second.target_max_bytes) << "B"; - detail->push_back(make_pair(HEALTH_WARN, ss.str())); - } - } - if (nearfull) { - ostringstream ss; - ss << "'" << name << "' at/near target max"; - summary.push_back(make_pair(HEALTH_WARN, ss.str())); - } - } - - // scrub - if (pg_map.pg_sum.stats.sum.num_scrub_errors) { - ostringstream ss; - ss << pg_map.pg_sum.stats.sum.num_scrub_errors << " scrub errors"; - summary.push_back(make_pair(HEALTH_ERR, ss.str())); - if (detail) { - detail->push_back(make_pair(HEALTH_ERR, ss.str())); - } - } - - // pg skew - int num_in = mon->osdmon()->osdmap.get_num_in_osds(); - int sum_pg_up = MAX(pg_map.pg_sum.up, static_cast(pg_map.pg_stat.size())); - if (num_in && g_conf->mon_pg_warn_min_per_osd > 0) { - int per = sum_pg_up / num_in; - if (per < g_conf->mon_pg_warn_min_per_osd && per) { - ostringstream ss; - ss << "too few PGs per OSD (" << per << " < min " << g_conf->mon_pg_warn_min_per_osd << ")"; - summary.push_back(make_pair(HEALTH_WARN, ss.str())); - if (detail) - detail->push_back(make_pair(HEALTH_WARN, ss.str())); - } - } - if (num_in && g_conf->mon_pg_warn_max_per_osd > 0) { - int per = sum_pg_up / num_in; - if (per > g_conf->mon_pg_warn_max_per_osd) { - ostringstream ss; - ss << "too many PGs per OSD (" << per << " > max " << g_conf->mon_pg_warn_max_per_osd << ")"; - summary.push_back(make_pair(HEALTH_WARN, ss.str())); - if (detail) - detail->push_back(make_pair(HEALTH_WARN, ss.str())); - } - } - if (!pg_map.pg_stat.empty()) { - for (ceph::unordered_map::const_iterator p = pg_map.pg_pool_sum.begin(); - p != pg_map.pg_pool_sum.end(); - ++p) { - const pg_pool_t *pi = mon->osdmon()->osdmap.get_pg_pool(p->first); - if (!pi) - continue; // in case osdmap changes haven't propagated to PGMap yet - const string& name = mon->osdmon()->osdmap.get_pool_name(p->first); - if (pi->get_pg_num() > pi->get_pgp_num() && - !(name.find(".DELETED") != string::npos && - g_conf->mon_fake_pool_delete)) { - ostringstream ss; - ss << "pool " << name << " pg_num " - << pi->get_pg_num() << " > pgp_num " << pi->get_pgp_num(); - summary.push_back(make_pair(HEALTH_WARN, ss.str())); - if (detail) - detail->push_back(make_pair(HEALTH_WARN, ss.str())); - } - int average_objects_per_pg = pg_map.pg_sum.stats.sum.num_objects / pg_map.pg_stat.size(); - if (average_objects_per_pg > 0 && - pg_map.pg_sum.stats.sum.num_objects >= g_conf->mon_pg_warn_min_objects && - p->second.stats.sum.num_objects >= g_conf->mon_pg_warn_min_pool_objects) { - int objects_per_pg = p->second.stats.sum.num_objects / pi->get_pg_num(); - float ratio = (float)objects_per_pg / (float)average_objects_per_pg; - if (g_conf->mon_pg_warn_max_object_skew > 0 && - ratio > g_conf->mon_pg_warn_max_object_skew) { - ostringstream ss; - ss << "pool " << name << " has many more objects per pg than average (too few pgs?)"; - summary.push_back(make_pair(HEALTH_WARN, ss.str())); - if (detail) { - ostringstream ss; - ss << "pool " << name << " objects per pg (" - << objects_per_pg << ") is more than " << ratio << " times cluster average (" - << average_objects_per_pg << ")"; - detail->push_back(make_pair(HEALTH_WARN, ss.str())); - } - } - } - } - } - - print_unscrubbed_pgs(pg_map.pg_stat, summary, detail, cct); - + pg_map.get_health(cct, mon->osdmon()->osdmap, summary, detail); } void PGMonitor::check_full_osd_health(list >& summary, -- 2.39.5