"#client-authentication";
}
-int DaemonServer::_check_offlines_pgs(
+void DaemonServer::_check_offlines_pgs(
const set<int>& osds,
const OSDMap& osdmap,
const PGMap& pgmap,
- int *out_touched_pgs)
+ offline_pg_report *report)
{
- int touched_pgs = 0;
- int dangerous_pgs = 0;
+ // reset output
+ *report = offline_pg_report();
+ report->osds = osds;
+
for (const auto& q : pgmap.pg_stat) {
set<int32_t> pg_acting; // net acting sets (with no missing if degraded)
bool found = false;
if (!found) {
continue;
}
- touched_pgs++;
const pg_pool_t *pi = osdmap.get_pg_pool(q.first.pool());
+ bool dangerous = false;
if (!pi) {
- ++dangerous_pgs; // pool is creating or deleting
- continue;
+ report->bad_no_pool.insert(q.first); // pool is creating or deleting
+ dangerous = true;
}
if (!(q.second.state & PG_STATE_ACTIVE)) {
- ++dangerous_pgs;
- continue;
+ report->bad_already_inactive.insert(q.first);
+ dangerous = true;
}
if (pg_acting.size() < pi->min_size) {
- ++dangerous_pgs;
+ report->bad_become_inactive.insert(q.first);
+ dangerous = true;
+ }
+ if (dangerous) {
+ report->not_ok.insert(q.first);
+ } else {
+ report->ok.insert(q.first);
+ if (q.second.state & PG_STATE_DEGRADED) {
+ report->ok_become_more_degraded.insert(q.first);
+ } else {
+ report->ok_become_degraded.insert(q.first);
+ }
}
}
- dout(20) << osds << " -> " << dangerous_pgs << "/" << touched_pgs
- << " dangerous/touched" << dendl;
- *out_touched_pgs = touched_pgs;
- return dangerous_pgs;
+ dout(20) << osds << " -> " << report->ok.size() << " ok, "
+ << report->not_ok.size() << " not ok" << dendl;
}
-int DaemonServer::_maximize_ok_to_stop_set(
+void DaemonServer::_maximize_ok_to_stop_set(
const set<int>& orig_osds,
unsigned max,
const OSDMap& osdmap,
const PGMap& pgmap,
- int *out_touched_pgs,
- set<int> *out_osds)
+ offline_pg_report *out_report)
{
dout(20) << "orig_osds " << orig_osds << " max " << max << dendl;
- *out_osds = orig_osds;
- int r = _check_offlines_pgs(orig_osds, osdmap, pgmap, out_touched_pgs);
- if (r > 0) {
- return r;
+ _check_offlines_pgs(orig_osds, osdmap, pgmap, out_report);
+ if (!out_report->ok_to_stop()) {
+ return;
}
- if (orig_osds.size() == max) {
+ if (orig_osds.size() >= max) {
// already at max
- return 0;
+ return;
}
// semi-arbitrarily start with the first osd in the set
+ offline_pg_report report;
set<int> osds = orig_osds;
int parent = *osds.begin();
set<int> children;
while (true) {
// identify the next parent
- r = osdmap.crush->get_immediate_parent_id(parent, &parent);
+ int r = osdmap.crush->get_immediate_parent_id(parent, &parent);
if (r < 0) {
- return 0; // just go with what we have so far!
+ return; // just go with what we have so far!
}
// get candidate additions that are beneath this point in the tree
children.clear();
r = osdmap.crush->get_all_children(parent, &children);
if (r < 0) {
- return 0; // just go with what we have so far!
+ return; // just go with what we have so far!
}
dout(20) << " parent " << parent << " children " << children << dendl;
for (auto o : children) {
if (o >= 0 && osdmap.is_up(o) && osds.count(o) == 0) {
osds.insert(o);
- int touched;
- r = _check_offlines_pgs(osds, osdmap, pgmap, &touched);
- if (r > 0) {
+ _check_offlines_pgs(osds, osdmap, pgmap, &report);
+ if (!report.ok_to_stop()) {
osds.erase(o);
++failed;
continue;
}
- *out_osds = osds;
- *out_touched_pgs = touched;
+ *out_report = report;
if (osds.size() == max) {
dout(20) << " hit max" << dendl;
- return 0; // yay, we hit the max
+ return; // yay, we hit the max
}
}
}
if (failed) {
// we hit some failures; go with what we have
dout(20) << " hit some peer failures" << dendl;
- return 0;
+ return;
}
}
}
set<int> osds;
int64_t max = 1;
cmd_getval(cmdctx->cmdmap, "max", max);
- if (max < (int)osds.size()) {
- max = osds.size();
- }
int r;
cluster_state.with_osdmap([&](const OSDMap& osdmap) {
r = osdmap.parse_osd_id_list(ids, &osds, &ss);
ss << "must specify one or more OSDs";
r = -EINVAL;
}
+ if (max < (int)osds.size()) {
+ max = osds.size();
+ }
if (r < 0) {
cmdctx->reply(r, ss);
return true;
}
- set<int> out_osds;
- int touched_pgs = 0;
- int dangerous_pgs = 0;
+ offline_pg_report out_report;
cluster_state.with_osdmap_and_pgmap([&](const OSDMap& osdmap, const PGMap& pg_map) {
if (pg_map.num_pg_unknown > 0) {
ss << pg_map.num_pg_unknown << " pgs have unknown state; "
r = -EAGAIN;
return;
}
- dangerous_pgs = _maximize_ok_to_stop_set(
+ _maximize_ok_to_stop_set(
osds, max, osdmap, pg_map,
- &touched_pgs, &out_osds);
+ &out_report);
});
if (r < 0) {
cmdctx->reply(r, ss);
return true;
}
- if (dangerous_pgs) {
- ss << dangerous_pgs << " PGs are already too degraded, would become"
- << " too degraded or might become unavailable";
- cmdctx->reply(-EBUSY, ss);
- return true;
- }
- ss << "These OSD(s) are ok to stop without reducing"
- << " availability or risking data, provided there are no other concurrent failures"
- << " or interventions." << std::endl;
- ss << touched_pgs << " PGs are likely to be"
- << " degraded (but remain available) as a result.";
if (!f) {
f.reset(Formatter::create("json"));
}
- f->open_array_section("osds");
- for (auto o : out_osds) {
- f->dump_int("osd", o);
- }
- f->close_section();
+ f->dump_object("ok_to_stop", out_report);
f->flush(cmdctx->odata);
cmdctx->odata.append("\n");
- cmdctx->reply(0, ss);
+ if (!out_report.ok_to_stop()) {
+ ss << "unsafe to stop osd(s)";
+ cmdctx->reply(-EBUSY, ss);
+ } else {
+ cmdctx->reply(0, ss);
+ }
return true;
} else if (prefix == "pg force-recovery" ||
prefix == "pg force-backfill" ||
struct OSDPerfMetricQuery;
struct MDSPerfMetricQuery;
+
+struct offline_pg_report {
+ set<int> osds;
+ set<pg_t> ok, not_ok;
+ set<pg_t> ok_become_degraded, ok_become_more_degraded; // ok
+ set<pg_t> bad_no_pool, bad_already_inactive, bad_become_inactive; // not ok
+
+ bool ok_to_stop() const {
+ return not_ok.empty();
+ }
+
+ void dump(Formatter *f) const {
+ f->dump_bool("ok_to_stop", ok_to_stop());
+ f->open_array_section("osds");
+ for (auto o : osds) {
+ f->dump_int("osd", o);
+ }
+ f->close_section();
+ f->dump_unsigned("num_ok_pgs", ok.size());
+ f->dump_unsigned("num_not_ok_pgs", not_ok.size());
+
+ // bad news
+ if (!bad_no_pool.empty()) {
+ f->open_array_section("bad_no_pool_pgs");
+ for (auto pg : bad_no_pool) {
+ f->dump_stream("pg") << pg;
+ }
+ f->close_section();
+ }
+ if (!bad_already_inactive.empty()) {
+ f->open_array_section("bad_already_inactive");
+ for (auto pg : bad_already_inactive) {
+ f->dump_stream("pg") << pg;
+ }
+ f->close_section();
+ }
+ if (!bad_become_inactive.empty()) {
+ f->open_array_section("bad_become_inactive");
+ for (auto pg : bad_become_inactive) {
+ f->dump_stream("pg") << pg;
+ }
+ f->close_section();
+ }
+
+ // informative
+ if (!ok_become_degraded.empty()) {
+ f->open_array_section("ok_become_degraded");
+ for (auto pg : ok_become_degraded) {
+ f->dump_stream("pg") << pg;
+ }
+ f->close_section();
+ }
+ if (!ok_become_more_degraded.empty()) {
+ f->open_array_section("ok_become_more_degraded");
+ for (auto pg : ok_become_more_degraded) {
+ f->dump_stream("pg") << pg;
+ }
+ f->close_section();
+ }
+ }
+};
+
+
/**
* Server used in ceph-mgr to communicate with Ceph daemons like
* MDSs and OSDs.
void _prune_pending_service_map();
- int _check_offlines_pgs(
+ void _check_offlines_pgs(
const set<int>& osds,
const OSDMap& osdmap,
const PGMap& pgmap,
- int *out_touched_pgs);
- int _maximize_ok_to_stop_set(
+ offline_pg_report *report);
+ void _maximize_ok_to_stop_set(
const set<int>& orig_osds,
unsigned max,
const OSDMap& osdmap,
const PGMap& pgmap,
- int *out_touched_pgs,
- set<int> *out_osds);
+ offline_pg_report *report);
utime_t started_at;
std::atomic<bool> pgmap_ready;