From: Sage Weil Date: Thu, 10 Aug 2017 18:06:02 +0000 (-0400) Subject: mgr: implement 'osd safe-to-destroy' and 'ok-to-stop' commands X-Git-Tag: v12.1.4~2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=6808af486d3bdce966232810004648502c967b13;p=ceph.git mgr: implement 'osd safe-to-destroy' and 'ok-to-stop' commands An osd is safe to destroy if - we have osd_stat for it - osd_stat indicates no pgs stored - all pgs are known - no pgs map to it An osd is ok ot stop if - we have pg stats - no pgs will drop below min_size Signed-off-by: Sage Weil (cherry picked from commit bf9380457bba5a834ffa2927c73165e0f1960332) --- diff --git a/doc/man/8/ceph.rst b/doc/man/8/ceph.rst index 0f372d9714be5..a95b4a7e28358 100644 --- a/doc/man/8/ceph.rst +++ b/doc/man/8/ceph.rst @@ -39,7 +39,7 @@ Synopsis | **ceph** **mon_status** -| **ceph** **osd** [ *blacklist* \| *blocked-by* \| *create* \| *new* \| *deep-scrub* \| *df* \| *down* \| *dump* \| *erasure-code-profile* \| *find* \| *getcrushmap* \| *getmap* \| *getmaxosd* \| *in* \| *lspools* \| *map* \| *metadata* \| *out* \| *pause* \| *perf* \| *pg-temp* \| *force-create-pg* \| *primary-affinity* \| *primary-temp* \| *repair* \| *reweight* \| *reweight-by-pg* \| *rm* \| *destroy* \| *purge* \| *scrub* \| *set* \| *setcrushmap* \| *setmaxosd* \| *stat* \| *tree* \| *unpause* \| *unset* ] ... +| **ceph** **osd** [ *blacklist* \| *blocked-by* \| *create* \| *new* \| *deep-scrub* \| *df* \| *down* \| *dump* \| *erasure-code-profile* \| *find* \| *getcrushmap* \| *getmap* \| *getmaxosd* \| *in* \| *lspools* \| *map* \| *metadata* \| *ok-to-stop* \| *out* \| *pause* \| *perf* \| *pg-temp* \| *force-create-pg* \| *primary-affinity* \| *primary-temp* \| *repair* \| *reweight* \| *reweight-by-pg* \| *rm* \| *destroy* \| *purge* \| *safe-to-destroy* \| *scrub* \| *set* \| *setcrushmap* \| *setmaxosd* \| *stat* \| *tree* \| *unpause* \| *unset* ] ... | **ceph** **osd** **crush** [ *add* \| *add-bucket* \| *create-or-move* \| *dump* \| *get-tunable* \| *link* \| *move* \| *remove* \| *rename-bucket* \| *reweight* \| *reweight-all* \| *reweight-subtree* \| *rm* \| *rule* \| *set* \| *set-tunable* \| *show-tunables* \| *tunables* \| *unlink* ] ... @@ -874,6 +874,18 @@ Usage:: ceph osd out [...] +Subcommand ``ok-to-stop`` checks whether the list of OSD(s) can be +stopped without immediately making data unavailable. That is, all +data should remain readable and writeable, although data redundancy +may be reduced as some PGs may end up in a degraded (but active) +state. It will return a success code if it is okay to stop the +OSD(s), or an error code and informative message if it is not or if no +conclusion can be drawn at the current time. + +Usage:: + + ceph osd ok-to-stop [...] + Subcommand ``pause`` pauses osd. Usage:: @@ -1066,6 +1078,16 @@ Usage:: ceph osd purge {--yes-i-really-mean-it} +Subcommand ``safe-to-destroy`` checks whether it is safe to remove or +destroy an OSD without reducing overall data redundancy or durability. +It will return a success code if it is definitely safe, or an error +code and informative message if it is not or if no conclusion can be +drawn at the current time. + +Usage:: + + ceph osd safe-to-destroy [...] + Subcommand ``scrub`` initiates scrub on specified osd. Usage:: diff --git a/doc/release-notes.rst b/doc/release-notes.rst index db0dbbc8b049b..76a8977806dba 100644 --- a/doc/release-notes.rst +++ b/doc/release-notes.rst @@ -210,6 +210,11 @@ Major Changes from Kraken - ``ceph osd {add,rm}-{noout,noin,nodown,noup}`` allow the `noout`, `noin`, `nodown`, and `noup` flags to be applied to specific OSDs. + - ``ceph osd safe-to-destroy `` will report whether it is safe to + remove or destroy OSD(s) without reducing data durability or redundancy. + - ``ceph osd ok-to-stop `` will report whether it is okay to stop + OSD(s) without immediately compromising availability (i.e., all PGs + should remain active but may be degraded). - ``ceph log last [n]`` will output the last *n* lines of the cluster log. - ``ceph mgr dump`` will dump the MgrMap, including the currently active diff --git a/src/mgr/DaemonServer.cc b/src/mgr/DaemonServer.cc index a58675ed24c07..34aac18718188 100644 --- a/src/mgr/DaemonServer.cc +++ b/src/mgr/DaemonServer.cc @@ -924,6 +924,163 @@ bool DaemonServer::handle_command(MCommand *m) }); cmdctx->reply(r, ""); return true; + } else if (prefix == "osd safe-to-destroy") { + vector ids; + cmd_getval(g_ceph_context, cmdctx->cmdmap, "ids", ids); + set osds; + int r; + cluster_state.with_osdmap([&](const OSDMap& osdmap) { + r = osdmap.parse_osd_id_list(ids, &osds, &ss); + }); + if (!r && osds.empty()) { + ss << "must specify one or more OSDs"; + r = -EINVAL; + } + if (r < 0) { + cmdctx->reply(r, ss); + return true; + } + set active_osds, missing_stats, stored_pgs; + int affected_pgs = 0; + cluster_state.with_pgmap([&](const PGMap& pg_map) { + if (pg_map.num_pg_unknown > 0) { + ss << pg_map.num_pg_unknown << " pgs have unknown state; cannot draw" + << " any conclusions"; + r = -EAGAIN; + return; + } + int num_active_clean = 0; + for (auto& p : pg_map.num_pg_by_state) { + unsigned want = PG_STATE_ACTIVE|PG_STATE_CLEAN; + if ((p.first & want) == want) { + num_active_clean += p.second; + } + } + cluster_state.with_osdmap([&](const OSDMap& osdmap) { + for (auto osd : osds) { + if (!osdmap.exists(osd)) { + continue; // clearly safe to destroy + } + auto q = pg_map.num_pg_by_osd.find(osd); + if (q != pg_map.num_pg_by_osd.end()) { + if (q->second.acting > 0 || q->second.up > 0) { + active_osds.insert(osd); + affected_pgs += q->second.acting + q->second.up; + continue; + } + } + if (num_active_clean < pg_map.num_pg) { + // all pgs aren't active+clean; we need to be careful. + auto p = pg_map.osd_stat.find(osd); + if (p == pg_map.osd_stat.end()) { + missing_stats.insert(osd); + } + if (p->second.num_pgs > 0) { + stored_pgs.insert(osd); + } + } + } + }); + }); + if (!r && !active_osds.empty()) { + ss << "OSD(s) " << active_osds << " have " << affected_pgs + << " pgs currently mapped to them"; + r = -EBUSY; + } else if (!missing_stats.empty()) { + ss << "OSD(s) " << missing_stats << " have no reported stats, and not all" + << " PGs are active+clean; we cannot draw any conclusions"; + r = -EAGAIN; + } else if (!stored_pgs.empty()) { + ss << "OSD(s) " << stored_pgs << " last reported they still store some PG" + << " data, and not all PGs are active+clean; we cannot be sure they" + << " aren't still needed."; + r = -EBUSY; + } + if (r) { + cmdctx->reply(r, ss); + return true; + } + ss << "OSD(s) " << osds << " are safe to destroy without reducing data" + << " durability."; + cmdctx->reply(0, ss); + return true; + } else if (prefix == "osd ok-to-stop") { + vector ids; + cmd_getval(g_ceph_context, cmdctx->cmdmap, "ids", ids); + set osds; + int r; + cluster_state.with_osdmap([&](const OSDMap& osdmap) { + r = osdmap.parse_osd_id_list(ids, &osds, &ss); + }); + if (!r && osds.empty()) { + ss << "must specify one or more OSDs"; + r = -EINVAL; + } + if (r < 0) { + cmdctx->reply(r, ss); + return true; + } + map pg_delta; // pgid -> net acting set size change + int dangerous_pgs = 0; + cluster_state.with_pgmap([&](const PGMap& pg_map) { + return cluster_state.with_osdmap([&](const OSDMap& osdmap) { + if (pg_map.num_pg_unknown > 0) { + ss << pg_map.num_pg_unknown << " pgs have unknown state; " + << "cannot draw any conclusions"; + r = -EAGAIN; + return; + } + for (auto osd : osds) { + auto p = pg_map.pg_by_osd.find(osd); + if (p != pg_map.pg_by_osd.end()) { + for (auto& pgid : p->second) { + --pg_delta[pgid]; + } + } + } + for (auto& p : pg_delta) { + auto q = pg_map.pg_stat.find(p.first); + if (q == pg_map.pg_stat.end()) { + ss << "missing information about " << p.first << "; cannot draw" + << " any conclusions"; + r = -EAGAIN; + return; + } + if (!(q->second.state & PG_STATE_ACTIVE) || + (q->second.state & PG_STATE_DEGRADED)) { + // we don't currently have a good way to tell *how* degraded + // a degraded PG is, so we have to assume we cannot remove + // any more replicas/shards. + ++dangerous_pgs; + continue; + } + const pg_pool_t *pi = osdmap.get_pg_pool(p.first.pool()); + if (!pi) { + ++dangerous_pgs; // pool is creating or deleting + } else { + if (q->second.acting.size() + p.second < pi->min_size) { + ++dangerous_pgs; + } + } + } + }); + }); + if (r) { + cmdctx->reply(r, ss); + return true; + } + if (dangerous_pgs) { + ss << dangerous_pgs << " PGs are already degraded or might become " + << "unavailable"; + cmdctx->reply(-EBUSY, ss); + return true; + } + ss << "OSD(s) " << osds << " are ok to stop without reducing" + << " availability, provided there are no other concurrent failures" + << " or interventions. " << pg_delta.size() << " PGs are likely to be" + << " degraded (but remain available) as a result."; + cmdctx->reply(0, ss); + return true; } else if (prefix == "pg force-recovery" || prefix == "pg force-backfill" || prefix == "pg cancel-force-recovery" || diff --git a/src/mgr/MgrCommands.h b/src/mgr/MgrCommands.h index e63bfc48c79bc..1818454e1fcc4 100644 --- a/src/mgr/MgrCommands.h +++ b/src/mgr/MgrCommands.h @@ -107,6 +107,13 @@ COMMAND("osd test-reweight-by-pg " \ "dry run of reweight OSDs by PG distribution [overload-percentage-for-consideration, default 120]", \ "osd", "r", "cli,rest") +COMMAND("osd safe-to-destroy name=ids,type=CephString,n=N", + "check whether osd(s) can be safely destroyed without reducing data durability", + "osd", "r", "cli,rest") +COMMAND("osd ok-to-stop name=ids,type=CephString,n=N", + "check whether osd(s) can be safely stopped without reducing immediate"\ + " data availability", "osd", "r", "cli,rest") + COMMAND("osd scrub " \ "name=who,type=CephString", \ "initiate scrub on osd , or use to scrub all", \