]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr: implement 'device {set,rm}-predicted-failure' commands
authorSage Weil <sage@redhat.com>
Tue, 5 Jun 2018 19:03:42 +0000 (14:03 -0500)
committerSage Weil <sage@redhat.com>
Mon, 11 Jun 2018 12:29:03 +0000 (07:29 -0500)
Record predicted device failure time.

Signed-off-by: Sage Weil <sage@redhat.com>
src/mgr/DaemonServer.cc
src/mgr/DaemonState.cc
src/mgr/DaemonState.h
src/mgr/MgrCommands.h

index 111c7883d10d38280e8f82073c8d3fff75548bf3..05c30aff30834328e99b6e9c4792a020a64e5966 100644 (file)
@@ -1730,7 +1730,8 @@ bool DaemonServer::handle_command(MCommand *m)
     cmd_getval(g_ceph_context, cmdctx->cmdmap, "devid", devid);
     int r = 0;
     ostringstream rs;
-    if (!daemon_state.with_device(devid, [&f, &rs] (const DeviceState& dev) {
+    if (!daemon_state.with_device(devid,
+                                 [&f, &rs] (const DeviceState& dev) {
          if (f) {
            f->open_object_section("device");
            f->dump_string("devid", dev.devid);
@@ -1740,11 +1741,24 @@ bool DaemonServer::handle_command(MCommand *m)
              f->dump_string("daemon", to_string(i));
            }
            f->close_section();
+           if (dev.expected_failure != utime_t()) {
+             f->dump_stream("expected_failure") << dev.expected_failure;
+             f->dump_stream("expected_failure_stamp")
+               << dev.expected_failure_stamp;
+           }
            f->close_section();
          } else {
            rs << "device " << dev.devid << "\n";
            rs << "host " << dev.server << "\n";
-           rs << "daemons " << dev.daemons << "\n";
+           set<string> d;
+           for (auto& j : dev.daemons) {
+             d.insert(to_string(j));
+           }
+           rs << "daemons " << d << "\n";
+           if (dev.expected_failure != utime_t()) {
+             rs << "expected_failure " << dev.expected_failure
+                << " (as of " << dev.expected_failure_stamp << ")\n";
+           }
          }
        })) {
       ss << "device " << devid << " not found";
@@ -1758,6 +1772,71 @@ bool DaemonServer::handle_command(MCommand *m)
     }
     cmdctx->reply(r, ss);
     return true;
+  } else if (prefix == "device set-predicted-failure") {
+    string devid;
+    cmd_getval(g_ceph_context, cmdctx->cmdmap, "devid", devid);
+    string when_str;
+    cmd_getval(g_ceph_context, cmdctx->cmdmap, "when", when_str);
+    utime_t when;
+    if (!when.parse(when_str)) {
+      ss << "unable to parse datetime '" << when_str << "'";
+      r = -EINVAL;
+      cmdctx->reply(r, ss);
+    } else {
+      map<string,string> meta;
+      daemon_state.with_device_create(devid, [when, &meta] (DeviceState& dev) {
+         dev.set_expected_failure(when, ceph_clock_now());
+         meta = dev.metadata;
+       });
+      json_spirit::Object json_object;
+      for (auto& i : meta) {
+       json_spirit::Config::add(json_object, i.first, i.second);
+      }
+      bufferlist json;
+      json.append(json_spirit::write(json_object));
+      const string cmd =
+       "{"
+       "\"prefix\": \"config-key set\", "
+       "\"key\": \"device/" + devid + "\""
+       "}";
+      auto on_finish = new ReplyOnFinish(cmdctx);
+      monc->start_mon_command({cmd}, json, nullptr, nullptr, on_finish);
+    }
+    return true;
+  } else if (prefix == "device rm-predicted-failure") {
+    string devid;
+    cmd_getval(g_ceph_context, cmdctx->cmdmap, "devid", devid);
+    map<string,string> meta;
+    if (daemon_state.with_device_write(devid, [&meta] (DeviceState& dev) {
+         dev.rm_expected_failure();
+         meta = dev.metadata;
+       })) {
+      string cmd;
+      bufferlist json;
+      if (meta.empty()) {
+       cmd =
+         "{"
+         "\"prefix\": \"config-key rm\", "
+         "\"key\": \"device/" + devid + "\""
+         "}";
+      } else {
+       json_spirit::Object json_object;
+       for (auto& i : meta) {
+         json_spirit::Config::add(json_object, i.first, i.second);
+       }
+       json.append(json_spirit::write(json_object));
+       cmd =
+         "{"
+         "\"prefix\": \"config-key set\", "
+         "\"key\": \"device/" + devid + "\""
+         "}";
+      }
+      auto on_finish = new ReplyOnFinish(cmdctx);
+      monc->start_mon_command({cmd}, json, nullptr, nullptr, on_finish);
+    } else {
+      cmdctx->reply(0, ss);
+    }
+    return true;
   } else {
     // fall back to feeding command to PGMap
     r = cluster_state.with_pgmap([&](const PGMap& pg_map) {
index e8daf7dfd0e663eb5e5b89cc6d03c50f4b2aef57..60eefb7dfe70a0d5c0c58b0bf9dc72a9d6960216 100644 (file)
 #include "DaemonState.h"
 
 #include "MgrSession.h"
+#include "include/stringify.h"
 
 #define dout_context g_ceph_context
 #define dout_subsys ceph_subsys_mgr
 #undef dout_prefix
 #define dout_prefix *_dout << "mgr " << __func__ << " "
 
+void DeviceState::set_metadata(map<string,string>&& m)
+{
+  metadata = std::move(m);
+  auto p = metadata.find("expected_failure");
+  if (p != metadata.end()) {
+    expected_failure.parse(p->second);
+  }
+  p = metadata.find("expected_failure_stamp");
+  if (p != metadata.end()) {
+    expected_failure_stamp.parse(p->second);
+  }
+}
+
+void DeviceState::set_expected_failure(utime_t when, utime_t now)
+{
+  expected_failure = when;
+  expected_failure_stamp = now;
+  metadata["expected_failure"] = stringify(expected_failure);
+  metadata["expected_failure_stamp"] = stringify(expected_failure_stamp);
+}
+
+void DeviceState::rm_expected_failure()
+{
+  expected_failure = utime_t();
+  expected_failure_stamp = utime_t();
+  metadata.erase("expected_failure");
+  metadata.erase("expected_failure_stamp");
+}
+
 void DaemonStateIndex::insert(DaemonStatePtr dm)
 {
   RWLock::WLocker l(lock);
index 654c12beb7e8584e62340e12ad0a83b7e812181e..27190b95941379a70a8e2ebf404b8f98be538b02 100644 (file)
@@ -193,10 +193,16 @@ struct DeviceState : public RefCountedObject
 
   std::map<string,string> metadata;  ///< persistent metadata
 
+  utime_t expected_failure;       ///< when device failure is expected
+  utime_t expected_failure_stamp; ///< when expected_failure was recorded
+
   DeviceState(const std::string& n) : devid(n) {}
 
   void set_metadata(map<string,string>&& m);
 
+  void set_expected_failure(utime_t when, utime_t now);
+  void rm_expected_failure();
+
   /// true of we can be safely forgotten/removed from memory
   bool empty() const {
     return daemons.empty() && metadata.empty();
@@ -263,7 +269,7 @@ public:
   }
 
   template<typename Callback, typename...Args>
-  auto with_device(const std::string& dev,
+  bool with_device(const std::string& dev,
                   Callback&& cb, Args&&... args) const {
     RWLock::RLocker l(lock);
     auto p = devices.find(dev);
@@ -274,6 +280,29 @@ public:
     return true;
   }
 
+  template<typename Callback, typename...Args>
+  bool with_device_write(const std::string& dev,
+                        Callback&& cb, Args&&... args) {
+    RWLock::WLocker l(lock);
+    auto p = devices.find(dev);
+    if (p == devices.end()) {
+      return false;
+    }
+    std::forward<Callback>(cb)(*p->second, std::forward<Args>(args)...);
+    if (p->second->empty()) {
+      _erase_device(p->second);
+    }
+    return true;
+  }
+
+  template<typename Callback, typename...Args>
+  void with_device_create(const std::string& dev,
+                         Callback&& cb, Args&&... args) {
+    RWLock::WLocker l(lock);
+    auto d = _get_or_create_device(dev);
+    std::forward<Callback>(cb)(*d, std::forward<Args>(args)...);
+  }
+
   template<typename Callback, typename...Args>
   void with_devices(Callback&& cb, Args&&... args) const {
     RWLock::RLocker l(lock);
index 4415aff00d2ad73a3733356b0d4704301ad18a74..c3870c681f99b4c5b4cc99ee1d92ea38ec15e97c 100644 (file)
@@ -153,3 +153,9 @@ COMMAND("device ls-by-daemon name=who,type=CephString",
 COMMAND("device ls-by-host name=host,type=CephString",
        "Show devices on a host",
        "mgr", "r", "cli,rest")
+COMMAND("device set-predicted-failure name=devid,type=CephString name=when,type=CephString",
+       "Set predicted device failure time",
+       "mgr", "rw", "cli,rest")
+COMMAND("device rm-predicted-failure name=devid,type=CephString",
+       "Clear predicted device failure time",
+       "mgr", "rw", "cli,rest")