]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mon: add `osd destroy`
authorJoao Eduardo Luis <joao@suse.de>
Wed, 31 May 2017 08:55:36 +0000 (09:55 +0100)
committerJoao Eduardo Luis <joao@suse.de>
Mon, 5 Jun 2017 14:21:58 +0000 (15:21 +0100)
This new command will remove a given osd's auth keys, along with any
daemon-private and dm-crypt info contained in the config-key store,
without actually removing the osd from crush.

Instead of removing the osd from the osdmap, we mark it as `destroyed`,
thus preventing competing, racing commands from recreating the osd.

Signed-off-by: Joao Eduardo Luis <joao@suse.de>
src/common/ceph_strings.cc
src/include/rados.h
src/mon/AuthMonitor.cc
src/mon/AuthMonitor.h
src/mon/ConfigKeyService.cc
src/mon/ConfigKeyService.h
src/mon/MonCommands.h
src/mon/OSDMonitor.cc
src/mon/OSDMonitor.h
src/osd/OSDMap.h

index 422ccace661296443163269b80bf82fcd7c0a454..ef3aa8027619adb4ae4eee07ab0b26a990686200 100644 (file)
@@ -44,9 +44,11 @@ const char *ceph_osd_state_name(int s)
                return "nearfull";
        case CEPH_OSD_BACKFILLFULL:
                return "backfillfull";
+        case CEPH_OSD_DESTROYED:
+                return "destroyed";
        default:
                return "???";
-       }       
+       }
 }
 
 const char *ceph_release_name(int r)
index 426ba45308d2b2f0fbc5e1afac7e07ef88fc04a3..b2fa55b18e1d9d28f4c2a14e7c76690bc4395bc6 100644 (file)
@@ -117,6 +117,7 @@ struct ceph_eversion {
 #define CEPH_OSD_FULL    (1<<4)  /* osd is at or above full threshold */
 #define CEPH_OSD_NEARFULL (1<<5) /* osd is at or above nearfull threshold */
 #define CEPH_OSD_BACKFILLFULL (1<<6) /* osd is at or above backfillfull threshold */
+#define CEPH_OSD_DESTROYED (1<<7) /* osd has been destroyed */
 
 extern const char *ceph_osd_state_name(int s);
 
index 48f334a0064495425a8b7558d69ace13697cd459..5616c35104524f274b687b70a5454eaa06f4471c 100644 (file)
@@ -17,6 +17,7 @@
 #include "mon/AuthMonitor.h"
 #include "mon/Monitor.h"
 #include "mon/MonitorDBStore.h"
+#include "mon/ConfigKeyService.h"
 
 #include "messages/MMonCommand.h"
 #include "messages/MAuth.h"
@@ -26,6 +27,7 @@
 
 #include "auth/AuthServiceHandler.h"
 #include "auth/KeyRing.h"
+#include "include/stringify.h"
 #include "include/assert.h"
 
 #define dout_subsys ceph_subsys_mon
@@ -667,6 +669,88 @@ int AuthMonitor::import_keyring(KeyRing& keyring)
   return 0;
 }
 
+int AuthMonitor::remove_entity(const EntityName &entity)
+{
+  dout(10) << __func__ << " " << entity << dendl;
+  if (!mon->key_server.contains(entity))
+    return -ENOENT;
+
+  KeyServerData::Incremental auth_inc;
+  auth_inc.name = entity;
+  auth_inc.op = KeyServerData::AUTH_INC_DEL;
+  push_cephx_inc(auth_inc);
+
+  return 0;
+}
+
+int AuthMonitor::validate_osd_destroy(
+    int32_t id,
+    const uuid_d& uuid,
+    EntityName& cephx_entity,
+    EntityName& lockbox_entity,
+    stringstream& ss)
+{
+  assert(paxos->is_plugged());
+
+  dout(10) << __func__ << " id " << id << " uuid " << uuid << dendl;
+
+  string cephx_str = "osd." + stringify(id);
+  string lockbox_str = "client.osd-lockbox." + stringify(uuid);
+
+  if (!cephx_entity.from_str(cephx_str)) {
+    dout(10) << __func__ << " invalid cephx entity '"
+             << cephx_str << "'" << dendl;
+    ss << "invalid cephx key entity '" << cephx_str << "'";
+    return -EINVAL;
+  }
+
+  if (!lockbox_entity.from_str(lockbox_str)) {
+    dout(10) << __func__ << " invalid lockbox entity '"
+             << lockbox_str << "'" << dendl;
+    ss << "invalid lockbox key entity '" << lockbox_str << "'";
+    return -EINVAL;
+  }
+
+  return 0;
+}
+
+int AuthMonitor::do_osd_destroy(
+    const EntityName& cephx_entity,
+    const EntityName& lockbox_entity)
+{
+  assert(paxos->is_plugged());
+
+  dout(10) << __func__ << " cephx " << cephx_entity
+                       << " lockbox " << lockbox_entity << dendl;
+
+  bool removed = false;
+
+  int err = remove_entity(cephx_entity);
+  if (err == -ENOENT) {
+    dout(10) << __func__ << " " << cephx_entity << " does not exist" << dendl;
+  } else {
+    removed = true;
+  }
+
+  err = remove_entity(lockbox_entity);
+  if (err == -ENOENT) {
+    dout(10) << __func__ << " " << lockbox_entity << " does not exist" << dendl;
+  } else {
+    removed = true;
+  }
+
+  if (!removed) {
+    dout(10) << __func__ << " entities do not exist -- no-op." << dendl;
+    return 0;
+  }
+
+  // given we have paxos plugged, this will not result in a proposal
+  // being triggered, but it will still be needed so that we get our
+  // pending state encoded into the paxos' pending transaction.
+  propose_pending();
+  return 0;
+}
+
 bool AuthMonitor::prepare_command(MonOpRequestRef op)
 {
   MMonCommand *m = static_cast<MMonCommand*>(op->get_req());
index edeb12a3e1bbb52dc709bc2bffee8bf837953dc6..2c06858bd3f7ecf107119f1bb98a5c0ad2879d20 100644 (file)
@@ -158,6 +158,9 @@ private:
   bool prepare_command(MonOpRequestRef op);
 
   bool check_rotate();
+
+  int remove_entity(const EntityName &entity);
+
  public:
   AuthMonitor(Monitor *mn, Paxos *p, const string& service_name)
     : PaxosService(mn, p, service_name),
@@ -167,9 +170,19 @@ private:
   {}
 
   void pre_auth(MAuth *m);
-  
+
   void tick() override;  // check state, take actions
 
+  int validate_osd_destroy(
+      int32_t id,
+      const uuid_d& uuid,
+      EntityName& cephx_entity,
+      EntityName& lockbox_entity,
+      stringstream& ss);
+  int do_osd_destroy(
+      const EntityName& cephx_entity,
+      const EntityName& lockbox_entity);
+
   void dump_info(Formatter *f);
 };
 
index f685daa05a66a2b2d06c9bd3c4e2bc33a60260e5..7b79943a6562b4e3e0e5eb4011034d6101d1f1a6 100644 (file)
@@ -20,6 +20,7 @@
 #include "mon/ConfigKeyService.h"
 #include "mon/MonitorDBStore.h"
 #include "common/errno.h"
+#include "include/stringify.h"
 
 #define dout_subsys ceph_subsys_mon
 #undef dout_prefix
@@ -55,12 +56,19 @@ void ConfigKeyService::store_put(const string &key, bufferlist &bl, Context *cb)
 void ConfigKeyService::store_delete(const string &key, Context *cb)
 {
   MonitorDBStore::TransactionRef t = paxos->get_pending_transaction();
-  t->erase(STORE_PREFIX, key);
+  store_delete(t, key);
   if (cb)
     paxos->queue_pending_finisher(cb);
   paxos->trigger_propose();
 }
 
+void ConfigKeyService::store_delete(
+    MonitorDBStore::TransactionRef t,
+    const string &key)
+{
+  t->erase(STORE_PREFIX, key);
+}
+
 bool ConfigKeyService::store_exists(const string &key)
 {
   return mon->store->exists(STORE_PREFIX, key);
@@ -99,6 +107,24 @@ void ConfigKeyService::store_dump(stringstream &ss)
   f.flush(ss);
 }
 
+void ConfigKeyService::store_delete_prefix(
+    MonitorDBStore::TransactionRef t,
+    const string &prefix)
+{
+  KeyValueDB::Iterator iter =
+    mon->store->get_iterator(STORE_PREFIX);
+
+  while (iter->valid()) {
+    string key(iter->key());
+
+    size_t p = key.find(prefix);
+    if (p != string::npos && p == 0) {
+      store_delete(t, key);
+    }
+    iter->next();
+  }
+}
+
 bool ConfigKeyService::service_dispatch(MonOpRequestRef op)
 {
   Message *m = op->get_req();
@@ -208,6 +234,7 @@ bool ConfigKeyService::service_dispatch(MonOpRequestRef op)
     store_dump(tmp_ss);
     rdata.append(tmp_ss);
     ret = 0;
+
   }
 
 out:
@@ -219,3 +246,17 @@ out:
   return (ret == 0);
 }
 
+void ConfigKeyService::do_osd_destroy(int32_t id, uuid_d& uuid)
+{
+  string dmcrypt_prefix =
+    "dm-crypt/osd/" + stringify(uuid) + "/";
+  string daemon_prefix =
+    "daemon-private/osd." + stringify(id) + "/";
+
+  MonitorDBStore::TransactionRef t = paxos->get_pending_transaction();
+  for (auto p : { dmcrypt_prefix, daemon_prefix }) {
+    store_delete_prefix(t, p);
+  }
+
+  paxos->trigger_propose();
+}
index 34c70342a981cd25617dbeb90bfd03b209d34ccf..631264e8a67a16afc6dec5afe59e1ad1734c2e19 100644 (file)
@@ -15,6 +15,7 @@
 #define CEPH_MON_CONFIG_KEY_SERVICE_H
 
 #include "mon/QuorumService.h"
+#include "mon/MonitorDBStore.h"
 
 class Paxos;
 class Monitor;
@@ -28,7 +29,11 @@ class ConfigKeyService : public QuorumService
 
   int store_get(const string &key, bufferlist &bl);
   void store_put(const string &key, bufferlist &bl, Context *cb = NULL);
+  void store_delete(MonitorDBStore::TransactionRef t, const string &key);
   void store_delete(const string &key, Context *cb = NULL);
+  void store_delete_prefix(
+      MonitorDBStore::TransactionRef t,
+      const string &prefix);
   void store_list(stringstream &ss);
   void store_dump(stringstream &ss);
   bool store_exists(const string &key);
@@ -61,6 +66,8 @@ public:
   void cleanup() override { }
   void service_tick() override { }
 
+  void do_osd_destroy(int32_t id, uuid_d& uuid);
+
   int get_type() override {
     return QuorumService::SERVICE_CONFIG_KEY;
   }
index 31edf3e1657399c5bd654748ca318f53309300b7..70de88adc4b0317014980ea2b2901c8895cd4b66 100644 (file)
@@ -692,6 +692,13 @@ COMMAND("osd primary-affinity " \
        "type=CephFloat,name=weight,range=0.0|1.0", \
        "adjust osd primary-affinity from 0.0 <= <weight> <= 1.0", \
        "osd", "rw", "cli,rest")
+COMMAND("osd destroy " \
+        "name=id,type=CephOsdName " \
+        "name=sure,type=CephChoices,strings=--yes-i-really-mean-it,req=false", \
+        "mark osd as being destroyed. Keeps the ID intact (allowing reuse), " \
+        "but removes cephx keys, config-key data and lockbox keys, "\
+        "rendering data permanently unreadable.", \
+        "osd", "rw", "cli,rest")
 COMMAND("osd lost " \
        "name=id,type=CephOsdName " \
        "name=sure,type=CephChoices,strings=--yes-i-really-mean-it,req=false", \
index 9d92130aeca1a75308ef006d28a2114389e9145f..892ae51db08949e0c257b96b1a404a778db5d84b 100644 (file)
@@ -24,6 +24,8 @@
 #include "MDSMonitor.h"
 #include "PGMonitor.h"
 #include "MgrStatMonitor.h"
+#include "mon/AuthMonitor.h"
+#include "mon/ConfigKeyService.h"
 
 #include "MonitorDBStore.h"
 #include "Session.h"
@@ -6430,6 +6432,40 @@ static int parse_reweights(CephContext *cct,
   return 0;
 }
 
+int OSDMonitor::prepare_command_osd_destroy(
+    int32_t id,
+    stringstream& ss)
+{
+  assert(paxos->is_plugged());
+  uuid_d uuid = osdmap.get_uuid(id);
+  dout(10) << __func__ << " destroying osd." << id
+           << " uuid " << uuid << dendl;
+
+  if (osdmap.is_destroyed(id)) {
+    ss << "destroyed osd." << id;
+    return 0;
+  }
+
+  EntityName cephx_entity, lockbox_entity;
+
+  int err = mon->authmon()->validate_osd_destroy(id, uuid,
+                                                 cephx_entity,
+                                                 lockbox_entity,
+                                                 ss);
+  if (err < 0) {
+    return err;
+  }
+
+  err = mon->authmon()->do_osd_destroy(cephx_entity, lockbox_entity);
+  assert(0 == err);
+
+  ((ConfigKeyService*)mon->config_key_service)->do_osd_destroy(id, uuid);
+
+  pending_inc.new_state[id] = CEPH_OSD_DESTROYED;
+  pending_inc.new_uuid[id] = uuid_d();
+  return 0;
+}
+
 bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
                                      map<string,cmd_vartype> &cmdmap)
 {
@@ -8245,6 +8281,72 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
       return true;
     }
 
+  } else if (prefix == "osd destroy") {
+    /* Destroying an OSD means that we don't expect to further make use of
+     * the OSDs data (which may even become unreadable after this operation),
+     * and that we are okay with scrubbing all its cephx keys and config-key
+     * data (which may include lockbox keys, thus rendering the osd's data
+     * unreadable).
+     *
+     * The OSD will not be removed. Instead, we will mark it as destroyed,
+     * such that a subsequent call to `create` will not reuse the osd id.
+     * This will play into being able to recreate the OSD, at the same
+     * crush location, with minimal data movement.
+     */
+
+    // make sure authmon is writeable.
+    if (!mon->authmon()->is_writeable()) {
+      dout(10) << __func__ << " waiting for auth mon to be writeable for "
+               << "osd destroy" << dendl;
+      mon->authmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
+      return false;
+    }
+
+    int64_t id;
+    if (!cmd_getval(g_ceph_context, cmdmap, "id", id)) {
+      ss << "unable to parse osd id value '"
+         << cmd_vartype_stringify(cmdmap["id"]) << "";
+      err = -EINVAL;
+      goto reply;
+    }
+
+    string sure;
+    if (!cmd_getval(g_ceph_context, cmdmap, "sure", sure) ||
+        sure != "--yes-i-really-mean-it") {
+      ss << "Are you SURE? This will mean real, permanent data loss, as well "
+         << "as cephx and lockbox keys. Pass --yes-i-really-mean-it if you "
+         << "really do.";
+      err = -EPERM;
+      goto reply;
+    } else if (!osdmap.exists(id)) {
+      ss << "osd." << id << " does not exist";
+      err = -ENOENT;
+      goto reply;
+    } else if (osdmap.is_up(id)) {
+      ss << "osd." << id << " is not `down`.";
+      err = -EBUSY;
+      goto reply;
+    } else if (osdmap.is_destroyed(id)) {
+      ss << "destroyed osd." << id;
+      err = 0;
+      goto reply;
+    }
+
+    paxos->plug();
+    err = prepare_command_osd_destroy(id, ss);
+    paxos->unplug();
+
+    if (err < 0) {
+      goto reply;
+    }
+
+    ss << "destroyed osd." << id;
+    getline(ss, rs);
+    wait_for_finished_proposal(op,
+        new Monitor::C_Command(mon, op, 0, rs, get_last_committed() + 1));
+    force_immediate_propose();
+    return true;
+
   } else if (prefix == "osd create") {
     int i = -1;
 
index b87a5af6036d7a449ec628da2229608fa919ce8a..840cebb1462d9902d36e138cc4cc7f588ca27f16 100644 (file)
@@ -476,6 +476,7 @@ public:
   bool prepare_command(MonOpRequestRef op);
   bool prepare_command_impl(MonOpRequestRef op, map<string,cmd_vartype>& cmdmap);
 
+  int prepare_command_osd_destroy(int32_t id, stringstream& ss);
   int prepare_command_pool_set(map<string,cmd_vartype> &cmdmap,
                                stringstream& ss);
 
index 96a360d034971ac3f035baed45c97a95bba9d59a..a9005dddfd34f55c48b0d6aaa621a45c29de522f 100644 (file)
@@ -721,6 +721,10 @@ public:
     return osd >= 0 && osd < max_osd && (osd_state[osd] & CEPH_OSD_EXISTS);
   }
 
+  bool is_destroyed(int osd) const {
+    return exists(osd) && (osd_state[osd] & CEPH_OSD_DESTROYED);
+  }
+
   bool is_up(int osd) const {
     return exists(osd) && (osd_state[osd] & CEPH_OSD_UP);
   }