From 4920bbd6cb49290122eaa0542b7a81ffaa0f4aed Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 7 Jun 2018 21:22:47 -0500 Subject: [PATCH] mgr: "predicted failure" -> "life expectancy", and as a range 1- Change terminology from "predicted failure" to "life expectancy", which has a more intuitive meaning. 2- Change from an expected time stamp to a range. Any expectancy has an inherent uncertainty, so a single time is not meaningful. Instead, express as a range, e.g. "4-6 weeks from now", where we record the min and max date as well as when the prediction was made. This is still a bit awkward, but I'm not sure what is better. It will always be a bit awkward to express uncertainty since in precise terms it is probably a 90% confidence interval or something. Signed-off-by: Sage Weil --- src/mgr/DaemonServer.cc | 35 ++++++++++++++---------- src/mgr/DaemonState.cc | 59 ++++++++++++++++++++++++++--------------- src/mgr/DaemonState.h | 10 ++++--- src/mgr/MgrCommands.h | 10 ++++--- 4 files changed, 71 insertions(+), 43 deletions(-) diff --git a/src/mgr/DaemonServer.cc b/src/mgr/DaemonServer.cc index 3dcfcf5c9a2..777c84db845 100644 --- a/src/mgr/DaemonServer.cc +++ b/src/mgr/DaemonServer.cc @@ -1670,7 +1670,7 @@ bool DaemonServer::handle_command(MCommand *m) tbl.define_column("DEVICE", TextTable::LEFT, TextTable::LEFT); tbl.define_column("HOST:DEV", TextTable::LEFT, TextTable::LEFT); tbl.define_column("DAEMONS", TextTable::LEFT, TextTable::LEFT); - tbl.define_column("EXPECTED FAILURE", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("LIFE EXPECTANCY", TextTable::LEFT, TextTable::LEFT); daemon_state.with_devices([&tbl](const DeviceState& dev) { string h; for (auto& i : dev.devnames) { @@ -1689,7 +1689,7 @@ bool DaemonServer::handle_command(MCommand *m) tbl << dev.devid << h << d - << stringify(dev.expected_failure) + << dev.get_life_expectancy_str() << TextTable::endrow; }); cmdctx->odata.append(stringify(tbl)); @@ -1732,7 +1732,7 @@ bool DaemonServer::handle_command(MCommand *m) } tbl << dev.devid << h - << stringify(dev.expected_failure) + << dev.get_life_expectancy_str() << TextTable::endrow; }); } @@ -1787,7 +1787,7 @@ bool DaemonServer::handle_command(MCommand *m) tbl << dev.devid << n << d - << stringify(dev.expected_failure) + << dev.get_life_expectancy_str() << TextTable::endrow; }); } @@ -1819,20 +1819,27 @@ bool DaemonServer::handle_command(MCommand *m) } cmdctx->reply(r, ss); return true; - } else if (prefix == "device set-predicted-failure") { + } else if (prefix == "device set-life-expectancy") { string devid; cmd_getval(g_ceph_context, cmdctx->cmdmap, "devid", devid); - string when_str; - cmd_getval(g_ceph_context, cmdctx->cmdmap, "when", when_str); - utime_t when; - if (!when.parse(when_str)) { - ss << "unable to parse datetime '" << when_str << "'"; + string from_str, to_str; + cmd_getval(g_ceph_context, cmdctx->cmdmap, "from", from_str); + cmd_getval(g_ceph_context, cmdctx->cmdmap, "to", to_str); + utime_t from, to; + if (!from.parse(from_str)) { + ss << "unable to parse datetime '" << from_str << "'"; + r = -EINVAL; + cmdctx->reply(r, ss); + } else if (!to.parse(to_str)) { + ss << "unable to parse datetime '" << to_str << "'"; r = -EINVAL; cmdctx->reply(r, ss); } else { map meta; - daemon_state.with_device_create(devid, [when, &meta] (DeviceState& dev) { - dev.set_expected_failure(when, ceph_clock_now()); + daemon_state.with_device_create( + devid, + [from, to, &meta] (DeviceState& dev) { + dev.set_life_expectancy(from, to, ceph_clock_now()); meta = dev.metadata; }); json_spirit::Object json_object; @@ -1850,12 +1857,12 @@ bool DaemonServer::handle_command(MCommand *m) monc->start_mon_command({cmd}, json, nullptr, nullptr, on_finish); } return true; - } else if (prefix == "device rm-predicted-failure") { + } else if (prefix == "device rm-life-expectancy") { string devid; cmd_getval(g_ceph_context, cmdctx->cmdmap, "devid", devid); map meta; if (daemon_state.with_device_write(devid, [&meta] (DeviceState& dev) { - dev.rm_expected_failure(); + dev.rm_life_expectancy(); meta = dev.metadata; })) { string cmd; diff --git a/src/mgr/DaemonState.cc b/src/mgr/DaemonState.cc index 824e606fa36..1e1ebdb67cc 100644 --- a/src/mgr/DaemonState.cc +++ b/src/mgr/DaemonState.cc @@ -25,30 +25,45 @@ void DeviceState::set_metadata(map&& m) { metadata = std::move(m); - auto p = metadata.find("expected_failure"); + auto p = metadata.find("life_expectancy_min"); if (p != metadata.end()) { - expected_failure.parse(p->second); + life_expectancy.first.parse(p->second); } - p = metadata.find("expected_failure_stamp"); + p = metadata.find("life_expectancy_max"); if (p != metadata.end()) { - expected_failure_stamp.parse(p->second); + life_expectancy.second.parse(p->second); } + p = metadata.find("life_expectancy_stamp"); + if (p != metadata.end()) { + life_expectancy_stamp.parse(p->second); + } +} + +void DeviceState::set_life_expectancy(utime_t from, utime_t to, utime_t now) +{ + life_expectancy = make_pair(from, to); + life_expectancy_stamp = now; + metadata["life_expectancy_min"] = stringify(life_expectancy.first); + metadata["life_expectancy_max"] = stringify(life_expectancy.second); + metadata["life_expectancy_stamp"] = stringify(life_expectancy_stamp); } -void DeviceState::set_expected_failure(utime_t when, utime_t now) +void DeviceState::rm_life_expectancy() { - expected_failure = when; - expected_failure_stamp = now; - metadata["expected_failure"] = stringify(expected_failure); - metadata["expected_failure_stamp"] = stringify(expected_failure_stamp); + life_expectancy = make_pair(utime_t(), utime_t()); + life_expectancy_stamp = utime_t(); + metadata.erase("life_expectancy_min"); + metadata.erase("life_expectancy_max"); + metadata.erase("life_expectancy_stamp"); } -void DeviceState::rm_expected_failure() +string DeviceState::get_life_expectancy_str() const { - expected_failure = utime_t(); - expected_failure_stamp = utime_t(); - metadata.erase("expected_failure"); - metadata.erase("expected_failure_stamp"); + if (life_expectancy.first == utime_t()) { + return string(); + } + return stringify(life_expectancy.first) + " to " + + stringify(life_expectancy.second); } void DeviceState::dump(Formatter *f) const @@ -67,10 +82,11 @@ void DeviceState::dump(Formatter *f) const f->dump_string("daemon", to_string(i)); } f->close_section(); - if (expected_failure != utime_t()) { - f->dump_stream("expected_failure") << expected_failure; - f->dump_stream("expected_failure_stamp") - << expected_failure_stamp; + if (life_expectancy.first != utime_t()) { + f->dump_stream("life_expectancy_min") << life_expectancy.first; + f->dump_stream("life_expectancy_max") << life_expectancy.second; + f->dump_stream("life_expectancy_stamp") + << life_expectancy_stamp; } } @@ -85,9 +101,10 @@ void DeviceState::print(ostream& out) const d.insert(to_string(j)); } out << "daemons " << d << "\n"; - if (expected_failure != utime_t()) { - out << "expected_failure " << expected_failure - << " (as of " << expected_failure_stamp << ")\n"; + if (life_expectancy.first != utime_t()) { + out << "life_expectancy " << life_expectancy.first << " to " + << life_expectancy.second + << " (as of " << life_expectancy_stamp << ")\n"; } } diff --git a/src/mgr/DaemonState.h b/src/mgr/DaemonState.h index 2ceedb926bb..6f9f4f3d93d 100644 --- a/src/mgr/DaemonState.h +++ b/src/mgr/DaemonState.h @@ -198,15 +198,17 @@ struct DeviceState : public RefCountedObject std::map metadata; ///< persistent metadata - utime_t expected_failure; ///< when device failure is expected - utime_t expected_failure_stamp; ///< when expected_failure was recorded + pair life_expectancy; ///< when device failure is expected + utime_t life_expectancy_stamp; ///< when life expectency was recorded DeviceState(const std::string& n) : devid(n) {} void set_metadata(map&& m); - void set_expected_failure(utime_t when, utime_t now); - void rm_expected_failure(); + void set_life_expectancy(utime_t from, utime_t to, utime_t now); + void rm_life_expectancy(); + + string get_life_expectancy_str() const; /// true of we can be safely forgotten/removed from memory bool empty() const { diff --git a/src/mgr/MgrCommands.h b/src/mgr/MgrCommands.h index c3870c681f9..746ed02e52e 100644 --- a/src/mgr/MgrCommands.h +++ b/src/mgr/MgrCommands.h @@ -153,9 +153,11 @@ COMMAND("device ls-by-daemon name=who,type=CephString", COMMAND("device ls-by-host name=host,type=CephString", "Show devices on a host", "mgr", "r", "cli,rest") -COMMAND("device set-predicted-failure name=devid,type=CephString name=when,type=CephString", - "Set predicted device failure time", +COMMAND("device set-life-expectancy name=devid,type=CephString "\ + "name=from,type=CephString "\ + "name=to,type=CephString", + "Set predicted device life expectancy", "mgr", "rw", "cli,rest") -COMMAND("device rm-predicted-failure name=devid,type=CephString", - "Clear predicted device failure time", +COMMAND("device rm-life-expectancy name=devid,type=CephString", + "Clear predicted device life expectancy", "mgr", "rw", "cli,rest") -- 2.39.5