]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: add `session ls` and `session evict` to asok 2101/head
authorJohn Spray <john.spray@redhat.com>
Tue, 24 Jun 2014 20:22:04 +0000 (13:22 -0700)
committerJohn Spray <john.spray@redhat.com>
Mon, 14 Jul 2014 13:15:27 +0000 (14:15 +0100)
These commands are intended to help admins deal
with MDSs during recovery, to identify troublesome
clients which may need intervention (such as eviction).

Signed-off-by: John Spray <john.spray@redhat.com>
src/mds/MDS.cc
src/mds/Server.cc
src/mds/Server.h
src/mds/SessionMap.cc
src/mds/SessionMap.h

index 7aca856f4ce0976a146be1405c85246e058781f1..8ab9d1a924aed5faaa59f68cf7dc72fc4fdbf974 100644 (file)
@@ -210,6 +210,8 @@ public:
 bool MDS::asok_command(string command, cmdmap_t& cmdmap, string format,
                    ostream& ss)
 {
+  dout(1) << "asok_command: " << command << dendl;
+
   Formatter *f = new_formatter(format);
   if (!f)
     f = new_formatter("json-pretty");
@@ -224,6 +226,48 @@ bool MDS::asok_command(string command, cmdmap_t& cmdmap, string format,
     op_tracker.dump_ops_in_flight(f);
   } else if (command == "dump_historic_ops") {
     op_tracker.dump_historic_ops(f);
+  } else if (command == "session ls") {
+    mds_lock.Lock();
+
+    // Dump sessions, decorated with recovery/replay status
+    f->open_array_section("sessions");
+    const ceph::unordered_map<entity_name_t, Session*> session_map = sessionmap.get_sessions();
+    for (ceph::unordered_map<entity_name_t,Session*>::const_iterator p = session_map.begin();
+         p != session_map.end();
+         ++p)  {
+      if (!p->first.is_client()) {
+        continue;
+      }
+
+      f->open_object_section("session");
+      f->dump_int("id", p->first.num());
+      f->dump_string("state", p->second->get_state_name());
+      f->dump_int("replay_requests", is_clientreplay() ? p->second->get_request_count() : 0);
+      f->dump_bool("reconnecting", server->waiting_for_reconnect(p->first.num()));
+      f->dump_stream("inst") << p->second->info.inst;
+      f->close_section(); //session
+    }
+    f->close_section(); //sessions
+
+    mds_lock.Unlock();
+  } else if (command == "session evict") {
+    std::string client_id;
+    const bool got_arg = cmd_getval(g_ceph_context, cmdmap, "client_id", client_id);
+    assert(got_arg == true);
+
+    mds_lock.Lock();
+    Session *session = sessionmap.get_session(entity_name_t(CEPH_ENTITY_TYPE_CLIENT,
+                                                           strtol(client_id.c_str(), 0, 10)));
+    if (session) {
+      C_SaferCond on_safe;
+      server->kill_session(session, &on_safe);
+
+      mds_lock.Unlock();
+      on_safe.wait();
+    } else {
+      dout(15) << "session " << session << " not in sessionmap!" << dendl;
+      mds_lock.Unlock();
+    }
   }
   f->flush(ss);
   delete f;
@@ -246,6 +290,16 @@ void MDS::set_up_admin_socket()
                                     asok_hook,
                                     "show slowest recent ops");
   assert(0 == r);
+  r = admin_socket->register_command("session evict",
+                                    "session evict name=client_id,type=CephString",
+                                    asok_hook,
+                                    "Evict a CephFS client");
+  assert(0 == r);
+  r = admin_socket->register_command("session ls",
+                                    "session ls",
+                                    asok_hook,
+                                    "Enumerate connected CephFS clients");
+  assert(0 == r);
 }
 
 void MDS::clean_up_admin_socket()
@@ -859,7 +913,7 @@ void MDS::handle_command(MMonCommand *m)
     Session *session = sessionmap.get_session(entity_name_t(CEPH_ENTITY_TYPE_CLIENT,
                                                            strtol(m->cmd[2].c_str(), 0, 10)));
     if (session)
-      server->kill_session(session);
+      server->kill_session(session, NULL);
     else
       dout(15) << "session " << session << " not in sessionmap!" << dendl;
   } else if (m->cmd[0] == "issue_caps") {
index 896f0ee8cd156bb84e915ca878465590cf9ecbb9..742a8514a32ca53e88b5d239d201ab17d537706d 100644 (file)
@@ -148,14 +148,18 @@ class C_MDS_session_finish : public Context {
   version_t cmapv;
   interval_set<inodeno_t> inos;
   version_t inotablev;
+  Context *fin;
 public:
-  C_MDS_session_finish(MDS *m, Session *se, uint64_t sseq, bool s, version_t mv) :
-    mds(m), session(se), state_seq(sseq), open(s), cmapv(mv), inotablev(0) { }
-  C_MDS_session_finish(MDS *m, Session *se, uint64_t sseq, bool s, version_t mv, interval_set<inodeno_t>& i, version_t iv) :
-    mds(m), session(se), state_seq(sseq), open(s), cmapv(mv), inos(i), inotablev(iv) { }
+  C_MDS_session_finish(MDS *m, Session *se, uint64_t sseq, bool s, version_t mv, Context *fin_ = NULL) :
+    mds(m), session(se), state_seq(sseq), open(s), cmapv(mv), inotablev(0), fin(fin_) { }
+  C_MDS_session_finish(MDS *m, Session *se, uint64_t sseq, bool s, version_t mv, interval_set<inodeno_t>& i, version_t iv, Context *fin_ = NULL) :
+    mds(m), session(se), state_seq(sseq), open(s), cmapv(mv), inos(i), inotablev(iv), fin(fin_) { }
   void finish(int r) {
     assert(r == 0);
     mds->server->_session_logged(session, state_seq, open, cmapv, inos, inotablev);
+    if (fin) {
+      fin->complete(r);
+    }
   }
 };
 
@@ -252,7 +256,7 @@ void Server::handle_client_session(MClientSession *m)
                << ", BUGGY!" << dendl;
        assert(0);
       }
-      journal_close_session(session, Session::STATE_CLOSING);
+      journal_close_session(session, Session::STATE_CLOSING, NULL);
     }
     break;
 
@@ -323,6 +327,15 @@ void Server::_session_logged(Session *session, uint64_t state_seq, bool open, ve
       dout(20) << " killing client lease of " << *dn << dendl;
       dn->remove_client_lease(r, mds->locker);
     }
+    if (client_reconnect_gather.count(session->info.get_client())) {
+      dout(20) << " removing client from reconnect set" << dendl;
+      client_reconnect_gather.erase(session->info.get_client());
+
+      if (client_reconnect_gather.empty()) {
+        dout(7) << " client " << session->info.inst << " was last reconnect, finishing" << dendl;
+        reconnect_gather_finish();
+      }
+    }
     
     if (session->is_closing()) {
       // mark con disposable.  if there is a fault, we will get a
@@ -435,7 +448,7 @@ void Server::terminate_sessions()
        session->is_killing() ||
        session->is_closed())
       continue;
-    journal_close_session(session, Session::STATE_CLOSING);
+    journal_close_session(session, Session::STATE_CLOSING, NULL);
   }
 
   mdlog->wait_for_safe(new C_MDS_TerminatedSessions(this));
@@ -500,28 +513,31 @@ void Server::find_idle_sessions()
     mds->clog.info() << "closing stale session " << session->info.inst
        << " after " << age << "\n";
     dout(10) << "autoclosing stale session " << session->info.inst << " last " << session->last_cap_renew << dendl;
-    kill_session(session);
+    kill_session(session, NULL);
   }
 }
 
-void Server::kill_session(Session *session)
+void Server::kill_session(Session *session, Context *on_safe)
 {
   if ((session->is_opening() ||
        session->is_open() ||
        session->is_stale()) &&
       !session->is_importing()) {
     dout(10) << "kill_session " << session << dendl;
-    journal_close_session(session, Session::STATE_KILLING);
+    journal_close_session(session, Session::STATE_KILLING, on_safe);
   } else {
     dout(10) << "kill_session importing or already closing/killing " << session << dendl;
     assert(session->is_closing() || 
           session->is_closed() || 
           session->is_killing() ||
           session->is_importing());
+    if (on_safe) {
+      on_safe->complete(0);
+    }
   }
 }
 
-void Server::journal_close_session(Session *session, int state)
+void Server::journal_close_session(Session *session, int state, Context *on_safe)
 {
   uint64_t sseq = mds->sessionmap.set_state(session, state);
   version_t pv = ++mds->sessionmap.projected;
@@ -540,7 +556,7 @@ void Server::journal_close_session(Session *session, int state)
     piv = 0;
 
   mdlog->start_submit_entry(new ESession(session->info.inst, false, pv, both, piv),
-                           new C_MDS_session_finish(mds, session, sseq, false, pv, both, piv));
+                           new C_MDS_session_finish(mds, session, sseq, false, pv, both, piv, on_safe));
   mdlog->flush();
 
   // clean up requests, too
@@ -703,7 +719,7 @@ void Server::reconnect_tick()
       Session *session = mds->sessionmap.get_session(entity_name_t::CLIENT(p->v));
       assert(session);
       dout(1) << "reconnect gave up on " << session->info.inst << dendl;
-      kill_session(session);
+      kill_session(session, NULL);
       failed_reconnects++;
     }
     client_reconnect_gather.clear();
@@ -7573,3 +7589,12 @@ void Server::_rmsnap_finish(MDRequestRef& mdr, CInode *diri, snapid_t snapid)
 }
 
 
+/**
+ * Return true if server is in state RECONNECT and this
+ * client has not yet reconnected.
+ */
+bool Server::waiting_for_reconnect(client_t c) const
+{
+  return client_reconnect_gather.count(c) > 0;
+}
+
index 94d401149bca76a8de1d1737fc4c1aa842ca99d5..423089189585379da5e1056dea97c4d21e85a8a0 100644 (file)
@@ -73,6 +73,7 @@ public:
   // -- sessions and recovery --
   utime_t  reconnect_start;
   set<client_t> client_reconnect_gather;  // clients i need a reconnect msg from.
+  bool waiting_for_reconnect(client_t c) const;
 
   Session *get_session(Message *m);
   void handle_client_session(class MClientSession *m);
@@ -87,8 +88,8 @@ public:
   void finish_flush_session(Session *session, version_t seq);
   void terminate_sessions();
   void find_idle_sessions();
-  void kill_session(Session *session);
-  void journal_close_session(Session *session, int state);
+  void kill_session(Session *session, Context *on_safe);
+  void journal_close_session(Session *session, int state, Context *on_safe);
   void reconnect_clients();
   void handle_client_reconnect(class MClientReconnect *m);
   //void process_reconnect_cap(CInode *in, int from, ceph_mds_cap_reconnect& capinfo);
index 1b270290eaf26ff9351ac305425a80a08f6d79d9..8b797f763c97640f3b440f50e1578b9ad3e75aef 100644 (file)
@@ -14,6 +14,7 @@
 
 #include "MDS.h"
 #include "MDCache.h"
+#include "Mutation.h"
 #include "SessionMap.h"
 #include "osdc/Filer.h"
 
@@ -228,6 +229,7 @@ void SessionMap::dump(Formatter *f) const
     f->open_object_section("entity name");
     p->first.dump(f);
     f->close_section(); // entity name
+    f->dump_string("state", p->second->get_state_name());
     f->open_object_section("Session info");
     p->second->info.dump(f);
     f->close_section(); // Session info
@@ -267,3 +269,24 @@ void SessionMap::wipe_ino_prealloc()
   }
   projected = ++version;
 }
+
+/**
+ * Calculate the length of the `requests` member list,
+ * because elist does not have a size() method.
+ *
+ * O(N) runtime.  This would be const, but elist doesn't
+ * have const iterators.
+ */
+size_t Session::get_request_count()
+{
+  size_t result = 0;
+
+  elist<MDRequestImpl*>::iterator p = requests.begin(
+      member_offset(MDRequestImpl, item_session_request));
+  while (!p.end()) {
+    ++result;
+  }
+
+  return result;
+}
+
index ac7fd46d46a9a5bf4a75e0d48ea59d4c873f678f..fefda91fdd22f415a78f5201b38749226af86d44 100644 (file)
@@ -89,6 +89,7 @@ public:
   list<Message*> preopen_out_queue;  ///< messages for client, queued before they connect
 
   elist<MDRequestImpl*> requests;
+  size_t get_request_count();
 
   interval_set<inodeno_t> pending_prealloc_inos; // journaling prealloc, will be added to prealloc_inos
 
@@ -253,6 +254,10 @@ public:
     
   // sessions
   bool empty() { return session_map.empty(); }
+  const ceph::unordered_map<entity_name_t, Session*> &get_sessions() const
+  {
+    return session_map;
+  }
 
   bool is_any_state(int state) {
     map<int,xlist<Session*>* >::iterator p = by_state.find(state);