From 957528b0dfcaf84119f848a1f7dfb8b1a01ad985 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 4 Mar 2021 08:35:24 -0500 Subject: [PATCH] mgr/DaemonServer: osd ok-to-stop: return json when there are unknown PGs In 791952cc01201010f298033003ba52374cc0159f we switched to return JSON both on success and fail to describe which PGs are affected or are blocking the ability to stop/restart OSDs. Do the same for the case where some PG states are unknown (i.e., just after a mgr restart) so that the cephadm upgrade process can unconditionally expect a JSON result. Signed-off-by: Sage Weil (cherry picked from commit 2cce16537c9f9e92bf0a8fce746d0364cbb2d15d) --- src/mgr/DaemonServer.cc | 23 ++++++++++++----------- src/mgr/DaemonServer.h | 13 +++++++++++-- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/src/mgr/DaemonServer.cc b/src/mgr/DaemonServer.cc index be58cd93e46f..2f6ecd0d909b 100644 --- a/src/mgr/DaemonServer.cc +++ b/src/mgr/DaemonServer.cc @@ -881,6 +881,10 @@ void DaemonServer::_check_offlines_pgs( for (const auto& q : pgmap.pg_stat) { set pg_acting; // net acting sets (with no missing if degraded) bool found = false; + if (q.second.state == 0) { + report->unknown.insert(q.first); + continue; + } if (q.second.state & PG_STATE_DEGRADED) { for (auto& anm : q.second.avail_no_missing) { if (osds.count(anm.osd)) { @@ -931,7 +935,9 @@ void DaemonServer::_check_offlines_pgs( } } dout(20) << osds << " -> " << report->ok.size() << " ok, " - << report->not_ok.size() << " not ok" << dendl; + << report->not_ok.size() << " not ok, " + << report->unknown.size() << " unknown" + << dendl; } void DaemonServer::_maximize_ok_to_stop_set( @@ -1744,26 +1750,21 @@ bool DaemonServer::_handle_command( } offline_pg_report out_report; cluster_state.with_osdmap_and_pgmap([&](const OSDMap& osdmap, const PGMap& pg_map) { - if (pg_map.num_pg_unknown > 0) { - ss << pg_map.num_pg_unknown << " pgs have unknown state; " - << "cannot draw any conclusions"; - r = -EAGAIN; - return; - } _maximize_ok_to_stop_set( osds, max, osdmap, pg_map, &out_report); }); - if (r < 0) { - cmdctx->reply(r, ss); - return true; - } if (!f) { f.reset(Formatter::create("json")); } f->dump_object("ok_to_stop", out_report); f->flush(cmdctx->odata); cmdctx->odata.append("\n"); + if (!out_report.unknown.empty()) { + ss << out_report.unknown.size() << " pgs have unknown state; " + << "cannot draw any conclusions"; + cmdctx->reply(-EAGAIN, ss); + } if (!out_report.ok_to_stop()) { ss << "unsafe to stop osd(s)"; cmdctx->reply(-EBUSY, ss); diff --git a/src/mgr/DaemonServer.h b/src/mgr/DaemonServer.h index 9119c6743f86..3adcf6ccaf93 100644 --- a/src/mgr/DaemonServer.h +++ b/src/mgr/DaemonServer.h @@ -48,12 +48,12 @@ struct MDSPerfMetricQuery; struct offline_pg_report { set osds; - set ok, not_ok; + set ok, not_ok, unknown; set ok_become_degraded, ok_become_more_degraded; // ok set bad_no_pool, bad_already_inactive, bad_become_inactive; // not ok bool ok_to_stop() const { - return not_ok.empty(); + return not_ok.empty() && unknown.empty(); } void dump(Formatter *f) const { @@ -66,6 +66,15 @@ struct offline_pg_report { f->dump_unsigned("num_ok_pgs", ok.size()); f->dump_unsigned("num_not_ok_pgs", not_ok.size()); + // ambiguous + if (!unknown.empty()) { + f->open_array_section("unknown_pgs"); + for (auto pg : unknown) { + f->dump_stream("pg") << pg; + } + f->close_section(); + } + // bad news if (!bad_no_pool.empty()) { f->open_array_section("bad_no_pool_pgs"); -- 2.47.3