From: Sage Weil Date: Sat, 20 Feb 2021 17:26:01 +0000 (-0500) Subject: mgr/DaemonServer: include details in 'osd ok-to-stop' output X-Git-Tag: v17.1.0~2808^2~3 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=791952cc01201010f298033003ba52374cc0159f;p=ceph-ci.git mgr/DaemonServer: include details in 'osd ok-to-stop' output Include specifics about which pgs are affect, which pgs prevent us from being ok to stop, etc. The primary downside I see here is that a success and failure output will look more similar to a human user Signed-off-by: Sage Weil --- diff --git a/src/mgr/DaemonServer.cc b/src/mgr/DaemonServer.cc index 94b96c666fa..6b25b8b9bfc 100644 --- a/src/mgr/DaemonServer.cc +++ b/src/mgr/DaemonServer.cc @@ -868,14 +868,16 @@ void DaemonServer::log_access_denied( "#client-authentication"; } -int DaemonServer::_check_offlines_pgs( +void DaemonServer::_check_offlines_pgs( const set& osds, const OSDMap& osdmap, const PGMap& pgmap, - int *out_touched_pgs) + offline_pg_report *report) { - int touched_pgs = 0; - int dangerous_pgs = 0; + // reset output + *report = offline_pg_report(); + report->osds = osds; + for (const auto& q : pgmap.pg_stat) { set pg_acting; // net acting sets (with no missing if degraded) bool found = false; @@ -903,62 +905,70 @@ int DaemonServer::_check_offlines_pgs( if (!found) { continue; } - touched_pgs++; const pg_pool_t *pi = osdmap.get_pg_pool(q.first.pool()); + bool dangerous = false; if (!pi) { - ++dangerous_pgs; // pool is creating or deleting - continue; + report->bad_no_pool.insert(q.first); // pool is creating or deleting + dangerous = true; } if (!(q.second.state & PG_STATE_ACTIVE)) { - ++dangerous_pgs; - continue; + report->bad_already_inactive.insert(q.first); + dangerous = true; } if (pg_acting.size() < pi->min_size) { - ++dangerous_pgs; + report->bad_become_inactive.insert(q.first); + dangerous = true; + } + if (dangerous) { + report->not_ok.insert(q.first); + } else { + report->ok.insert(q.first); + if (q.second.state & PG_STATE_DEGRADED) { + report->ok_become_more_degraded.insert(q.first); + } else { + report->ok_become_degraded.insert(q.first); + } } } - dout(20) << osds << " -> " << dangerous_pgs << "/" << touched_pgs - << " dangerous/touched" << dendl; - *out_touched_pgs = touched_pgs; - return dangerous_pgs; + dout(20) << osds << " -> " << report->ok.size() << " ok, " + << report->not_ok.size() << " not ok" << dendl; } -int DaemonServer::_maximize_ok_to_stop_set( +void DaemonServer::_maximize_ok_to_stop_set( const set& orig_osds, unsigned max, const OSDMap& osdmap, const PGMap& pgmap, - int *out_touched_pgs, - set *out_osds) + offline_pg_report *out_report) { dout(20) << "orig_osds " << orig_osds << " max " << max << dendl; - *out_osds = orig_osds; - int r = _check_offlines_pgs(orig_osds, osdmap, pgmap, out_touched_pgs); - if (r > 0) { - return r; + _check_offlines_pgs(orig_osds, osdmap, pgmap, out_report); + if (!out_report->ok_to_stop()) { + return; } - if (orig_osds.size() == max) { + if (orig_osds.size() >= max) { // already at max - return 0; + return; } // semi-arbitrarily start with the first osd in the set + offline_pg_report report; set osds = orig_osds; int parent = *osds.begin(); set children; while (true) { // identify the next parent - r = osdmap.crush->get_immediate_parent_id(parent, &parent); + int r = osdmap.crush->get_immediate_parent_id(parent, &parent); if (r < 0) { - return 0; // just go with what we have so far! + return; // just go with what we have so far! } // get candidate additions that are beneath this point in the tree children.clear(); r = osdmap.crush->get_all_children(parent, &children); if (r < 0) { - return 0; // just go with what we have so far! + return; // just go with what we have so far! } dout(20) << " parent " << parent << " children " << children << dendl; @@ -967,18 +977,16 @@ int DaemonServer::_maximize_ok_to_stop_set( for (auto o : children) { if (o >= 0 && osdmap.is_up(o) && osds.count(o) == 0) { osds.insert(o); - int touched; - r = _check_offlines_pgs(osds, osdmap, pgmap, &touched); - if (r > 0) { + _check_offlines_pgs(osds, osdmap, pgmap, &report); + if (!report.ok_to_stop()) { osds.erase(o); ++failed; continue; } - *out_osds = osds; - *out_touched_pgs = touched; + *out_report = report; if (osds.size() == max) { dout(20) << " hit max" << dendl; - return 0; // yay, we hit the max + return; // yay, we hit the max } } } @@ -986,7 +994,7 @@ int DaemonServer::_maximize_ok_to_stop_set( if (failed) { // we hit some failures; go with what we have dout(20) << " hit some peer failures" << dendl; - return 0; + return; } } } @@ -1719,9 +1727,6 @@ bool DaemonServer::_handle_command( set osds; int64_t max = 1; cmd_getval(cmdctx->cmdmap, "max", max); - if (max < (int)osds.size()) { - max = osds.size(); - } int r; cluster_state.with_osdmap([&](const OSDMap& osdmap) { r = osdmap.parse_osd_id_list(ids, &osds, &ss); @@ -1730,13 +1735,14 @@ bool DaemonServer::_handle_command( ss << "must specify one or more OSDs"; r = -EINVAL; } + if (max < (int)osds.size()) { + max = osds.size(); + } if (r < 0) { cmdctx->reply(r, ss); return true; } - set out_osds; - int touched_pgs = 0; - int dangerous_pgs = 0; + offline_pg_report out_report; cluster_state.with_osdmap_and_pgmap([&](const OSDMap& osdmap, const PGMap& pg_map) { if (pg_map.num_pg_unknown > 0) { ss << pg_map.num_pg_unknown << " pgs have unknown state; " @@ -1744,36 +1750,26 @@ bool DaemonServer::_handle_command( r = -EAGAIN; return; } - dangerous_pgs = _maximize_ok_to_stop_set( + _maximize_ok_to_stop_set( osds, max, osdmap, pg_map, - &touched_pgs, &out_osds); + &out_report); }); if (r < 0) { cmdctx->reply(r, ss); return true; } - if (dangerous_pgs) { - ss << dangerous_pgs << " PGs are already too degraded, would become" - << " too degraded or might become unavailable"; - cmdctx->reply(-EBUSY, ss); - return true; - } - ss << "These OSD(s) are ok to stop without reducing" - << " availability or risking data, provided there are no other concurrent failures" - << " or interventions." << std::endl; - ss << touched_pgs << " PGs are likely to be" - << " degraded (but remain available) as a result."; if (!f) { f.reset(Formatter::create("json")); } - f->open_array_section("osds"); - for (auto o : out_osds) { - f->dump_int("osd", o); - } - f->close_section(); + f->dump_object("ok_to_stop", out_report); f->flush(cmdctx->odata); cmdctx->odata.append("\n"); - cmdctx->reply(0, ss); + if (!out_report.ok_to_stop()) { + ss << "unsafe to stop osd(s)"; + cmdctx->reply(-EBUSY, ss); + } else { + cmdctx->reply(0, ss); + } return true; } else if (prefix == "pg force-recovery" || prefix == "pg force-backfill" || diff --git a/src/mgr/DaemonServer.h b/src/mgr/DaemonServer.h index 0be7d19019e..9119c6743f8 100644 --- a/src/mgr/DaemonServer.h +++ b/src/mgr/DaemonServer.h @@ -45,6 +45,69 @@ class CommandContext; struct OSDPerfMetricQuery; struct MDSPerfMetricQuery; + +struct offline_pg_report { + set osds; + set ok, not_ok; + set ok_become_degraded, ok_become_more_degraded; // ok + set bad_no_pool, bad_already_inactive, bad_become_inactive; // not ok + + bool ok_to_stop() const { + return not_ok.empty(); + } + + void dump(Formatter *f) const { + f->dump_bool("ok_to_stop", ok_to_stop()); + f->open_array_section("osds"); + for (auto o : osds) { + f->dump_int("osd", o); + } + f->close_section(); + f->dump_unsigned("num_ok_pgs", ok.size()); + f->dump_unsigned("num_not_ok_pgs", not_ok.size()); + + // bad news + if (!bad_no_pool.empty()) { + f->open_array_section("bad_no_pool_pgs"); + for (auto pg : bad_no_pool) { + f->dump_stream("pg") << pg; + } + f->close_section(); + } + if (!bad_already_inactive.empty()) { + f->open_array_section("bad_already_inactive"); + for (auto pg : bad_already_inactive) { + f->dump_stream("pg") << pg; + } + f->close_section(); + } + if (!bad_become_inactive.empty()) { + f->open_array_section("bad_become_inactive"); + for (auto pg : bad_become_inactive) { + f->dump_stream("pg") << pg; + } + f->close_section(); + } + + // informative + if (!ok_become_degraded.empty()) { + f->open_array_section("ok_become_degraded"); + for (auto pg : ok_become_degraded) { + f->dump_stream("pg") << pg; + } + f->close_section(); + } + if (!ok_become_more_degraded.empty()) { + f->open_array_section("ok_become_more_degraded"); + for (auto pg : ok_become_more_degraded) { + f->dump_stream("pg") << pg; + } + f->close_section(); + } + } +}; + + /** * Server used in ceph-mgr to communicate with Ceph daemons like * MDSs and OSDs. @@ -99,18 +162,17 @@ private: void _prune_pending_service_map(); - int _check_offlines_pgs( + void _check_offlines_pgs( const set& osds, const OSDMap& osdmap, const PGMap& pgmap, - int *out_touched_pgs); - int _maximize_ok_to_stop_set( + offline_pg_report *report); + void _maximize_ok_to_stop_set( const set& orig_osds, unsigned max, const OSDMap& osdmap, const PGMap& pgmap, - int *out_touched_pgs, - set *out_osds); + offline_pg_report *report); utime_t started_at; std::atomic pgmap_ready;