From 47abea157cf415a132d9a35311e5b5362708b8d3 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 5 Jun 2018 14:03:42 -0500 Subject: [PATCH] mgr: implement 'device {set,rm}-predicted-failure' commands Record predicted device failure time. Signed-off-by: Sage Weil --- src/mgr/DaemonServer.cc | 83 ++++++++++++++++++++++++++++++++++++++++- src/mgr/DaemonState.cc | 30 +++++++++++++++ src/mgr/DaemonState.h | 31 ++++++++++++++- src/mgr/MgrCommands.h | 6 +++ 4 files changed, 147 insertions(+), 3 deletions(-) diff --git a/src/mgr/DaemonServer.cc b/src/mgr/DaemonServer.cc index 111c7883d10..05c30aff308 100644 --- a/src/mgr/DaemonServer.cc +++ b/src/mgr/DaemonServer.cc @@ -1730,7 +1730,8 @@ bool DaemonServer::handle_command(MCommand *m) cmd_getval(g_ceph_context, cmdctx->cmdmap, "devid", devid); int r = 0; ostringstream rs; - if (!daemon_state.with_device(devid, [&f, &rs] (const DeviceState& dev) { + if (!daemon_state.with_device(devid, + [&f, &rs] (const DeviceState& dev) { if (f) { f->open_object_section("device"); f->dump_string("devid", dev.devid); @@ -1740,11 +1741,24 @@ bool DaemonServer::handle_command(MCommand *m) f->dump_string("daemon", to_string(i)); } f->close_section(); + if (dev.expected_failure != utime_t()) { + f->dump_stream("expected_failure") << dev.expected_failure; + f->dump_stream("expected_failure_stamp") + << dev.expected_failure_stamp; + } f->close_section(); } else { rs << "device " << dev.devid << "\n"; rs << "host " << dev.server << "\n"; - rs << "daemons " << dev.daemons << "\n"; + set d; + for (auto& j : dev.daemons) { + d.insert(to_string(j)); + } + rs << "daemons " << d << "\n"; + if (dev.expected_failure != utime_t()) { + rs << "expected_failure " << dev.expected_failure + << " (as of " << dev.expected_failure_stamp << ")\n"; + } } })) { ss << "device " << devid << " not found"; @@ -1758,6 +1772,71 @@ bool DaemonServer::handle_command(MCommand *m) } cmdctx->reply(r, ss); return true; + } else if (prefix == "device set-predicted-failure") { + string devid; + cmd_getval(g_ceph_context, cmdctx->cmdmap, "devid", devid); + string when_str; + cmd_getval(g_ceph_context, cmdctx->cmdmap, "when", when_str); + utime_t when; + if (!when.parse(when_str)) { + ss << "unable to parse datetime '" << when_str << "'"; + r = -EINVAL; + cmdctx->reply(r, ss); + } else { + map meta; + daemon_state.with_device_create(devid, [when, &meta] (DeviceState& dev) { + dev.set_expected_failure(when, ceph_clock_now()); + meta = dev.metadata; + }); + json_spirit::Object json_object; + for (auto& i : meta) { + json_spirit::Config::add(json_object, i.first, i.second); + } + bufferlist json; + json.append(json_spirit::write(json_object)); + const string cmd = + "{" + "\"prefix\": \"config-key set\", " + "\"key\": \"device/" + devid + "\"" + "}"; + auto on_finish = new ReplyOnFinish(cmdctx); + monc->start_mon_command({cmd}, json, nullptr, nullptr, on_finish); + } + return true; + } else if (prefix == "device rm-predicted-failure") { + string devid; + cmd_getval(g_ceph_context, cmdctx->cmdmap, "devid", devid); + map meta; + if (daemon_state.with_device_write(devid, [&meta] (DeviceState& dev) { + dev.rm_expected_failure(); + meta = dev.metadata; + })) { + string cmd; + bufferlist json; + if (meta.empty()) { + cmd = + "{" + "\"prefix\": \"config-key rm\", " + "\"key\": \"device/" + devid + "\"" + "}"; + } else { + json_spirit::Object json_object; + for (auto& i : meta) { + json_spirit::Config::add(json_object, i.first, i.second); + } + json.append(json_spirit::write(json_object)); + cmd = + "{" + "\"prefix\": \"config-key set\", " + "\"key\": \"device/" + devid + "\"" + "}"; + } + auto on_finish = new ReplyOnFinish(cmdctx); + monc->start_mon_command({cmd}, json, nullptr, nullptr, on_finish); + } else { + cmdctx->reply(0, ss); + } + return true; } else { // fall back to feeding command to PGMap r = cluster_state.with_pgmap([&](const PGMap& pg_map) { diff --git a/src/mgr/DaemonState.cc b/src/mgr/DaemonState.cc index e8daf7dfd0e..60eefb7dfe7 100644 --- a/src/mgr/DaemonState.cc +++ b/src/mgr/DaemonState.cc @@ -14,12 +14,42 @@ #include "DaemonState.h" #include "MgrSession.h" +#include "include/stringify.h" #define dout_context g_ceph_context #define dout_subsys ceph_subsys_mgr #undef dout_prefix #define dout_prefix *_dout << "mgr " << __func__ << " " +void DeviceState::set_metadata(map&& m) +{ + metadata = std::move(m); + auto p = metadata.find("expected_failure"); + if (p != metadata.end()) { + expected_failure.parse(p->second); + } + p = metadata.find("expected_failure_stamp"); + if (p != metadata.end()) { + expected_failure_stamp.parse(p->second); + } +} + +void DeviceState::set_expected_failure(utime_t when, utime_t now) +{ + expected_failure = when; + expected_failure_stamp = now; + metadata["expected_failure"] = stringify(expected_failure); + metadata["expected_failure_stamp"] = stringify(expected_failure_stamp); +} + +void DeviceState::rm_expected_failure() +{ + expected_failure = utime_t(); + expected_failure_stamp = utime_t(); + metadata.erase("expected_failure"); + metadata.erase("expected_failure_stamp"); +} + void DaemonStateIndex::insert(DaemonStatePtr dm) { RWLock::WLocker l(lock); diff --git a/src/mgr/DaemonState.h b/src/mgr/DaemonState.h index 654c12beb7e..27190b95941 100644 --- a/src/mgr/DaemonState.h +++ b/src/mgr/DaemonState.h @@ -193,10 +193,16 @@ struct DeviceState : public RefCountedObject std::map metadata; ///< persistent metadata + utime_t expected_failure; ///< when device failure is expected + utime_t expected_failure_stamp; ///< when expected_failure was recorded + DeviceState(const std::string& n) : devid(n) {} void set_metadata(map&& m); + void set_expected_failure(utime_t when, utime_t now); + void rm_expected_failure(); + /// true of we can be safely forgotten/removed from memory bool empty() const { return daemons.empty() && metadata.empty(); @@ -263,7 +269,7 @@ public: } template - auto with_device(const std::string& dev, + bool with_device(const std::string& dev, Callback&& cb, Args&&... args) const { RWLock::RLocker l(lock); auto p = devices.find(dev); @@ -274,6 +280,29 @@ public: return true; } + template + bool with_device_write(const std::string& dev, + Callback&& cb, Args&&... args) { + RWLock::WLocker l(lock); + auto p = devices.find(dev); + if (p == devices.end()) { + return false; + } + std::forward(cb)(*p->second, std::forward(args)...); + if (p->second->empty()) { + _erase_device(p->second); + } + return true; + } + + template + void with_device_create(const std::string& dev, + Callback&& cb, Args&&... args) { + RWLock::WLocker l(lock); + auto d = _get_or_create_device(dev); + std::forward(cb)(*d, std::forward(args)...); + } + template void with_devices(Callback&& cb, Args&&... args) const { RWLock::RLocker l(lock); diff --git a/src/mgr/MgrCommands.h b/src/mgr/MgrCommands.h index 4415aff00d2..c3870c681f9 100644 --- a/src/mgr/MgrCommands.h +++ b/src/mgr/MgrCommands.h @@ -153,3 +153,9 @@ COMMAND("device ls-by-daemon name=who,type=CephString", COMMAND("device ls-by-host name=host,type=CephString", "Show devices on a host", "mgr", "r", "cli,rest") +COMMAND("device set-predicted-failure name=devid,type=CephString name=when,type=CephString", + "Set predicted device failure time", + "mgr", "rw", "cli,rest") +COMMAND("device rm-predicted-failure name=devid,type=CephString", + "Clear predicted device failure time", + "mgr", "rw", "cli,rest") -- 2.39.5