]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr: "predicted failure" -> "life expectancy", and as a range
authorSage Weil <sage@redhat.com>
Fri, 8 Jun 2018 02:22:47 +0000 (21:22 -0500)
committerSage Weil <sage@redhat.com>
Mon, 11 Jun 2018 12:29:04 +0000 (07:29 -0500)
1- Change terminology from "predicted failure" to "life expectancy", which
has a more intuitive meaning.

2- Change from an expected time stamp to a range.  Any expectancy has an
inherent uncertainty, so a single time is not meaningful.  Instead, express
as a range, e.g. "4-6 weeks from now", where we record the min and max
date as well as when the prediction was made.

This is still a bit awkward, but I'm not sure what is better.  It will
always be a bit awkward to express uncertainty since in precise terms it
is probably a 90% confidence interval or something.

Signed-off-by: Sage Weil <sage@redhat.com>
src/mgr/DaemonServer.cc
src/mgr/DaemonState.cc
src/mgr/DaemonState.h
src/mgr/MgrCommands.h

index 3dcfcf5c9a28fc8cca427637ec41442b94ef97e5..777c84db845c9743d975b40e4ee7f9a08e622a46 100644 (file)
@@ -1670,7 +1670,7 @@ bool DaemonServer::handle_command(MCommand *m)
       tbl.define_column("DEVICE", TextTable::LEFT, TextTable::LEFT);
       tbl.define_column("HOST:DEV", TextTable::LEFT, TextTable::LEFT);
       tbl.define_column("DAEMONS", TextTable::LEFT, TextTable::LEFT);
-      tbl.define_column("EXPECTED FAILURE", TextTable::LEFT, TextTable::LEFT);
+      tbl.define_column("LIFE EXPECTANCY", TextTable::LEFT, TextTable::LEFT);
       daemon_state.with_devices([&tbl](const DeviceState& dev) {
          string h;
          for (auto& i : dev.devnames) {
@@ -1689,7 +1689,7 @@ bool DaemonServer::handle_command(MCommand *m)
          tbl << dev.devid
              << h
              << d
-             << stringify(dev.expected_failure)
+             << dev.get_life_expectancy_str()
              << TextTable::endrow;
        });
       cmdctx->odata.append(stringify(tbl));
@@ -1732,7 +1732,7 @@ bool DaemonServer::handle_command(MCommand *m)
                }
                tbl << dev.devid
                    << h
-                   << stringify(dev.expected_failure)
+                   << dev.get_life_expectancy_str()
                    << TextTable::endrow;
              });
          }
@@ -1787,7 +1787,7 @@ bool DaemonServer::handle_command(MCommand *m)
            tbl << dev.devid
                << n
                << d
-               << stringify(dev.expected_failure)
+               << dev.get_life_expectancy_str()
                << TextTable::endrow;
          });
       }
@@ -1819,20 +1819,27 @@ bool DaemonServer::handle_command(MCommand *m)
     }
     cmdctx->reply(r, ss);
     return true;
-  } else if (prefix == "device set-predicted-failure") {
+  } else if (prefix == "device set-life-expectancy") {
     string devid;
     cmd_getval(g_ceph_context, cmdctx->cmdmap, "devid", devid);
-    string when_str;
-    cmd_getval(g_ceph_context, cmdctx->cmdmap, "when", when_str);
-    utime_t when;
-    if (!when.parse(when_str)) {
-      ss << "unable to parse datetime '" << when_str << "'";
+    string from_str, to_str;
+    cmd_getval(g_ceph_context, cmdctx->cmdmap, "from", from_str);
+    cmd_getval(g_ceph_context, cmdctx->cmdmap, "to", to_str);
+    utime_t from, to;
+    if (!from.parse(from_str)) {
+      ss << "unable to parse datetime '" << from_str << "'";
+      r = -EINVAL;
+      cmdctx->reply(r, ss);
+    } else if (!to.parse(to_str)) {
+      ss << "unable to parse datetime '" << to_str << "'";
       r = -EINVAL;
       cmdctx->reply(r, ss);
     } else {
       map<string,string> meta;
-      daemon_state.with_device_create(devid, [when, &meta] (DeviceState& dev) {
-         dev.set_expected_failure(when, ceph_clock_now());
+      daemon_state.with_device_create(
+       devid,
+       [from, to, &meta] (DeviceState& dev) {
+         dev.set_life_expectancy(from, to, ceph_clock_now());
          meta = dev.metadata;
        });
       json_spirit::Object json_object;
@@ -1850,12 +1857,12 @@ bool DaemonServer::handle_command(MCommand *m)
       monc->start_mon_command({cmd}, json, nullptr, nullptr, on_finish);
     }
     return true;
-  } else if (prefix == "device rm-predicted-failure") {
+  } else if (prefix == "device rm-life-expectancy") {
     string devid;
     cmd_getval(g_ceph_context, cmdctx->cmdmap, "devid", devid);
     map<string,string> meta;
     if (daemon_state.with_device_write(devid, [&meta] (DeviceState& dev) {
-         dev.rm_expected_failure();
+         dev.rm_life_expectancy();
          meta = dev.metadata;
        })) {
       string cmd;
index 824e606fa363f0a9c78d868567e48bf527158d7a..1e1ebdb67ccfe2d441dee9cb96d0e76e0eebbc0b 100644 (file)
 void DeviceState::set_metadata(map<string,string>&& m)
 {
   metadata = std::move(m);
-  auto p = metadata.find("expected_failure");
+  auto p = metadata.find("life_expectancy_min");
   if (p != metadata.end()) {
-    expected_failure.parse(p->second);
+    life_expectancy.first.parse(p->second);
   }
-  p = metadata.find("expected_failure_stamp");
+  p = metadata.find("life_expectancy_max");
   if (p != metadata.end()) {
-    expected_failure_stamp.parse(p->second);
+    life_expectancy.second.parse(p->second);
   }
+  p = metadata.find("life_expectancy_stamp");
+  if (p != metadata.end()) {
+    life_expectancy_stamp.parse(p->second);
+  }
+}
+
+void DeviceState::set_life_expectancy(utime_t from, utime_t to, utime_t now)
+{
+  life_expectancy = make_pair(from, to);
+  life_expectancy_stamp = now;
+  metadata["life_expectancy_min"] = stringify(life_expectancy.first);
+  metadata["life_expectancy_max"] = stringify(life_expectancy.second);
+  metadata["life_expectancy_stamp"] = stringify(life_expectancy_stamp);
 }
 
-void DeviceState::set_expected_failure(utime_t when, utime_t now)
+void DeviceState::rm_life_expectancy()
 {
-  expected_failure = when;
-  expected_failure_stamp = now;
-  metadata["expected_failure"] = stringify(expected_failure);
-  metadata["expected_failure_stamp"] = stringify(expected_failure_stamp);
+  life_expectancy = make_pair(utime_t(), utime_t());
+  life_expectancy_stamp = utime_t();
+  metadata.erase("life_expectancy_min");
+  metadata.erase("life_expectancy_max");
+  metadata.erase("life_expectancy_stamp");
 }
 
-void DeviceState::rm_expected_failure()
+string DeviceState::get_life_expectancy_str() const
 {
-  expected_failure = utime_t();
-  expected_failure_stamp = utime_t();
-  metadata.erase("expected_failure");
-  metadata.erase("expected_failure_stamp");
+  if (life_expectancy.first == utime_t()) {
+    return string();
+  }
+  return stringify(life_expectancy.first) + " to " +
+    stringify(life_expectancy.second);
 }
 
 void DeviceState::dump(Formatter *f) const
@@ -67,10 +82,11 @@ void DeviceState::dump(Formatter *f) const
     f->dump_string("daemon", to_string(i));
   }
   f->close_section();
-  if (expected_failure != utime_t()) {
-    f->dump_stream("expected_failure") << expected_failure;
-    f->dump_stream("expected_failure_stamp")
-      << expected_failure_stamp;
+  if (life_expectancy.first != utime_t()) {
+    f->dump_stream("life_expectancy_min") << life_expectancy.first;
+    f->dump_stream("life_expectancy_max") << life_expectancy.second;
+    f->dump_stream("life_expectancy_stamp")
+      << life_expectancy_stamp;
   }
 }
 
@@ -85,9 +101,10 @@ void DeviceState::print(ostream& out) const
     d.insert(to_string(j));
   }
   out << "daemons " << d << "\n";
-  if (expected_failure != utime_t()) {
-    out << "expected_failure " << expected_failure
-       << " (as of " << expected_failure_stamp << ")\n";
+  if (life_expectancy.first != utime_t()) {
+    out << "life_expectancy " << life_expectancy.first << " to "
+       << life_expectancy.second
+       << " (as of " << life_expectancy_stamp << ")\n";
   }
 }
 
index 2ceedb926bb2d85d76096c538a2aa475f9e9576c..6f9f4f3d93d994706e42b9a87ea5b2c2bcef198a 100644 (file)
@@ -198,15 +198,17 @@ struct DeviceState : public RefCountedObject
 
   std::map<string,string> metadata;  ///< persistent metadata
 
-  utime_t expected_failure;       ///< when device failure is expected
-  utime_t expected_failure_stamp; ///< when expected_failure was recorded
+  pair<utime_t,utime_t> life_expectancy;  ///< when device failure is expected
+  utime_t life_expectancy_stamp;          ///< when life expectency was recorded
 
   DeviceState(const std::string& n) : devid(n) {}
 
   void set_metadata(map<string,string>&& m);
 
-  void set_expected_failure(utime_t when, utime_t now);
-  void rm_expected_failure();
+  void set_life_expectancy(utime_t from, utime_t to, utime_t now);
+  void rm_life_expectancy();
+
+  string get_life_expectancy_str() const;
 
   /// true of we can be safely forgotten/removed from memory
   bool empty() const {
index c3870c681f99b4c5b4cc99ee1d92ea38ec15e97c..746ed02e52e53f16ba2699131eba95fcb2b55464 100644 (file)
@@ -153,9 +153,11 @@ COMMAND("device ls-by-daemon name=who,type=CephString",
 COMMAND("device ls-by-host name=host,type=CephString",
        "Show devices on a host",
        "mgr", "r", "cli,rest")
-COMMAND("device set-predicted-failure name=devid,type=CephString name=when,type=CephString",
-       "Set predicted device failure time",
+COMMAND("device set-life-expectancy name=devid,type=CephString "\
+       "name=from,type=CephString "\
+       "name=to,type=CephString",
+       "Set predicted device life expectancy",
        "mgr", "rw", "cli,rest")
-COMMAND("device rm-predicted-failure name=devid,type=CephString",
-       "Clear predicted device failure time",
+COMMAND("device rm-life-expectancy name=devid,type=CephString",
+       "Clear predicted device life expectancy",
        "mgr", "rw", "cli,rest")