]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
MDS: add an OpTracker and use it
authorGreg Farnum <greg@inktank.com>
Fri, 9 May 2014 22:30:37 +0000 (15:30 -0700)
committerGreg Farnum <greg@inktank.com>
Mon, 12 May 2014 21:44:04 +0000 (14:44 -0700)
Signed-off-by: Greg Farnum <greg@inktank.com>
src/common/config_opts.h
src/mds/MDCache.cc
src/mds/MDS.cc
src/mds/MDS.h
src/mds/Makefile.am
src/mds/Mutation.cc
src/mds/Mutation.h

index a065a772f9b13021ca87ba64fa99e95ac940ca82..de31f3468a3353a7dcefdc2f1e758715747cc3af 100644 (file)
@@ -382,6 +382,11 @@ OPTION(max_mds, OPT_INT, 1)
 OPTION(mds_standby_for_name, OPT_STR, "")
 OPTION(mds_standby_for_rank, OPT_INT, -1)
 OPTION(mds_standby_replay, OPT_BOOL, false)
+OPTION(mds_enable_op_tracker, OPT_BOOL, true) // enable/disable MDS op tracking
+OPTION(mds_op_history_size, OPT_U32, 20)    // Max number of completed ops to track
+OPTION(mds_op_history_duration, OPT_U32, 600) // Oldest completed op to track
+OPTION(mds_op_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op complaint-worthy
+OPTION(mds_op_log_threshold, OPT_INT, 5) // how many op log messages to show in one go
 
 // If true, compact leveldb store on mount
 OPTION(osd_compact_leveldb_on_mount, OPT_BOOL, false)
index 090cd44c9aa481a0b72e20fd4ad6ce0db132e6b0..3a83cd7da1597db74645924a910af934de9dbc64 100644 (file)
@@ -8870,8 +8870,13 @@ MDRequestRef MDCache::request_start(MClientRequest *req)
   params.reqid = req->get_reqid();
   params.attempt = req->get_num_fwd();
   params.client_req = req;
+  params.initiated = req->get_recv_stamp();
+  params.throttled = req->get_throttle_stamp();
+  params.all_read = req->get_recv_complete_stamp();
+  params.dispatched = req->get_dispatch_stamp();
 
-  MDRequestRef mdr(new MDRequestImpl(params));
+  MDRequestRef mdr =
+      mds->op_tracker.create_request<MDRequestImpl,MDRequestImpl::Params>(params);
   active_requests[params.reqid] = mdr;
   dout(7) << "request_start " << *mdr << dendl;
   return mdr;
@@ -8885,7 +8890,12 @@ MDRequestRef MDCache::request_start_slave(metareqid_t ri, __u32 attempt, Message
   params.attempt = attempt;
   params.triggering_slave_req = m;
   params.slave_to = by;
-  MDRequestRef mdr(new MDRequestImpl(params));
+  params.initiated = m->get_recv_stamp();
+  params.throttled = m->get_throttle_stamp();
+  params.all_read = m->get_recv_complete_stamp();
+  params.dispatched = m->get_dispatch_stamp();
+  MDRequestRef mdr =
+      mds->op_tracker.create_request<MDRequestImpl,MDRequestImpl::Params>(params);
   assert(active_requests.count(mdr->reqid) == 0);
   active_requests[mdr->reqid] = mdr;
   dout(7) << "request_start_slave " << *mdr << " by mds." << by << dendl;
@@ -8897,6 +8907,8 @@ MDRequestRef MDCache::request_start_internal(int op)
   MDRequestImpl::Params params;
   params.reqid.name = entity_name_t::MDS(mds->get_nodeid());
   params.reqid.tid = mds->issue_tid();
+  params.initiated = ceph_clock_now(g_ceph_context);
+  params.internal_op = op;
   MDRequestRef mdr =
       mds->op_tracker.create_request<MDRequestImpl,MDRequestImpl::Params>(params);
 
index 7fd1bf29c53ea0ddbd9f454e0d9c2e3ff19f08f6..47010c484851251937a6712c6cf83ac2e7304cba 100644 (file)
@@ -105,6 +105,7 @@ MDS::MDS(const std::string &n, Messenger *m, MonClient *mc) :
   messenger(m),
   monc(mc),
   clog(m->cct, messenger, &mc->monmap, LogClient::NO_FLAGS),
+  op_tracker(cct, m->cct->_conf->mds_enable_op_tracker),
   sessionmap(this), asok_hook(NULL) {
 
   orig_argc = 0;
@@ -155,6 +156,10 @@ MDS::MDS(const std::string &n, Messenger *m, MonClient *mc) :
 
   logger = 0;
   mlogger = 0;
+  op_tracker.set_complaint_and_threshold(m->cct->_conf->mds_op_complaint_time,
+                                         m->cct->_conf->mds_op_log_threshold);
+  op_tracker.set_history_size_and_duration(m->cct->_conf->mds_op_history_size,
+                                           m->cct->_conf->mds_op_history_duration);
 }
 
 MDS::~MDS() {
@@ -221,6 +226,10 @@ bool MDS::asok_command(string command, cmdmap_t& cmdmap, string format,
     f->dump_string("state", ceph_mds_state_name(get_state()));
     f->dump_unsigned("mdsmap_epoch", mdsmap->get_epoch());
     f->close_section(); // status
+  } else if (command == "dump_ops_in_flight") {
+    op_tracker.dump_ops_in_flight(f);
+  } else if (command == "dump_historic_ops") {
+    op_tracker.dump_historic_ops(f);
   }
   f->flush(ss);
   delete f;
@@ -232,8 +241,42 @@ void MDS::set_up_admin_socket()
   int r;
   AdminSocket *admin_socket = g_ceph_context->get_admin_socket();
   asok_hook = new MDSSocketHook(this);
-  r = admin_socket->register_command("status", "status", asok_hook, "high-level status of MDS");
+  r = admin_socket->register_command("status", "status", asok_hook,
+                                    "high-level status of MDS");
   assert(0 == r);
+  r = admin_socket->register_command("dump_ops_in_flight",
+                                    "dump_ops_in_flight", asok_hook,
+                                    "show the ops currently in flight");
+  assert(0 == r);
+  r = admin_socket->register_command("dump_historic_ops", "dump_historic_ops",
+                                    asok_hook,
+                                    "show slowest recent ops");
+  assert(0 == r);
+}
+
+const char** MDS::get_tracked_conf_keys() const
+{
+  static const char* KEYS[] = {
+    "mds_op_complaint_time", "mds_op_log_threshold",
+    "mds_op_history_size", "mds_op_history_duration",
+    NULL
+  };
+  return KEYS;
+}
+
+void MDS::handle_conf_change(const struct md_config_t *conf,
+                            const std::set <std::string> &changed)
+{
+  if (changed.count("mds_op_complaint_time") ||
+      changed.count("mds_op_log_threshold")) {
+    op_tracker.set_complaint_and_threshold(conf->mds_op_complaint_time,
+                                           conf->mds_op_log_threshold);
+  }
+  if (changed.count("mds_op_history_size") ||
+      changed.count("mds_op_history_duration")) {
+    op_tracker.set_history_size_and_duration(conf->mds_op_history_size,
+                                             conf->mds_op_history_duration);
+  }
 }
 
 void MDS::create_logger()
@@ -592,6 +635,7 @@ int MDS::init(int wanted_state)
 
   create_logger();
   set_up_admin_socket();
+  g_conf->add_observer(this);
 
   mds_lock.Unlock();
 
@@ -660,9 +704,22 @@ void MDS::tick()
     if (snapserver)
       snapserver->check_osd_map(false);
   }
-}
 
+  check_ops_in_flight();
+}
 
+void MDS::check_ops_in_flight()
+{
+  vector<string> warnings;
+  if (op_tracker.check_ops_in_flight(warnings)) {
+    for (vector<string>::iterator i = warnings.begin();
+        i != warnings.end();
+        ++i) {
+      clog.warn() << *i;
+    }
+  }
+  return;
+}
 
 
 // -----------------------
@@ -1725,6 +1782,8 @@ void MDS::suicide()
 
   monc->shutdown();
 
+  op_tracker.on_shutdown();
+
   // shut down messenger
   messenger->shutdown();
 }
index 2649f449d09fded1bc7678ebf7a3c5ab134c8853..1e11d9b5de6b1f3cdaa60e69e42453ab471e7dd5 100644 (file)
@@ -29,6 +29,7 @@
 #include "common/Cond.h"
 #include "common/Timer.h"
 #include "common/LogClient.h"
+#include "common/TrackedOp.h"
 
 #include "MDSMap.h"
 
@@ -142,7 +143,7 @@ class MDSTableClient;
 
 class AuthAuthorizeHandlerRegistry;
 
-class MDS : public Dispatcher {
+class MDS : public Dispatcher, public md_config_obs_t {
  public:
   Mutex        mds_lock;
   SafeTimer    timer;
@@ -186,6 +187,7 @@ class MDS : public Dispatcher {
   MDSTableServer *get_table_server(int t);
 
   PerfCounters       *logger, *mlogger;
+  OpTracker    op_tracker;
 
   int orig_argc;
   const char **orig_argv;
@@ -362,11 +364,17 @@ class MDS : public Dispatcher {
   // start up, shutdown
   int init(int wanted_state=MDSMap::STATE_BOOT);
 
+  // admin socket handling
   friend class MDSSocketHook;
   class MDSSocketHook *asok_hook;
   bool asok_command(string command, cmdmap_t& cmdmap, string format,
                    ostream& ss);
   void set_up_admin_socket();
+  void check_ops_in_flight(); // send off any slow ops to monitor
+    // config observer bits
+  virtual const char** get_tracked_conf_keys() const;
+  virtual void handle_conf_change(const struct md_config_t *conf,
+                                 const std::set <std::string> &changed);
   void create_logger();
 
   void bcast_mds_map();  // to mounted clients
index 4ee35003ace0da94feac2b149921bd7100137030..bdc8fe0c2c2d7d9633ca462f24a462e10d2aa74e 100644 (file)
@@ -28,7 +28,8 @@ libmds_la_SOURCES = \
        mds/snap.cc \
        mds/SessionMap.cc \
        mds/MDLog.cc \
-       mds/MDSUtility.cc
+       mds/MDSUtility.cc \
+       common/TrackedOp.cc
 libmds_la_LIBADD = $(LIBOSDC)
 noinst_LTLIBRARIES += libmds.la
 
index ffeb066b6bd04e2a5bbb5a7ec2d854c9f8ff6764..c484de1ee3e027a9e0cd60fdef6a6680329be156 100644 (file)
@@ -288,3 +288,72 @@ void MDRequestImpl::print(ostream &out)
   out << ")";
 }
 
+void MDRequestImpl::_dump(utime_t now, Formatter *f) const
+{
+  f->dump_string("flag_point", state_string());
+  f->dump_stream("reqid") << reqid;
+  {
+    if (client_request) {
+      f->dump_string("op_type", "client_request");
+      f->open_object_section("client_info");
+      f->dump_stream("client") << client_request->get_orig_source();
+      f->dump_int("tid", client_request->get_tid());
+      f->close_section(); // client_info
+    } else if (slave_request) {
+      assert(!slave_request->is_reply()); // replies go to an existing mdr
+      f->dump_string("op_type", "slave_request");
+      f->open_object_section("master_info");
+      f->dump_stream("master") << slave_request->get_orig_source();
+      f->close_section(); // master_info
+
+      f->open_object_section("request_info");
+      f->dump_int("attempt", slave_request->get_attempt());
+      f->dump_string("op_type",
+                     slave_request->get_opname(slave_request->get_op()));
+      f->dump_int("lock_type", slave_request->get_lock_type());
+      f->dump_stream("object_info") << slave_request->get_object_info();
+      f->dump_stream("srcdnpath") << slave_request->srcdnpath;
+      f->dump_stream("destdnpath") << slave_request->destdnpath;
+      f->dump_stream("witnesses") << slave_request->witnesses;
+      f->dump_bool("has_inode_export",
+                   slave_request->inode_export.length() != 0);
+      f->dump_int("inode_export_v", slave_request->inode_export_v);
+      f->dump_bool("has_srci_replica",
+                   slave_request->srci_replica.length() != 0);
+      f->dump_stream("timestamp_now") << slave_request->now;
+      f->close_section(); // request_info
+    }
+    else { // internal request
+      assert(internal_op != -1);
+      f->dump_string("op_type", "internal_op");
+      f->dump_int("internal_op", internal_op);
+    }
+  }
+  {
+    f->open_array_section("events");
+    for (list<pair<utime_t, string> >::const_iterator i = events.begin();
+         i != events.end();
+         ++i) {
+      f->open_object_section("event");
+      f->dump_stream("time") << i->first;
+      f->dump_string("event", i->second);
+      f->close_section();
+    }
+    f->close_section(); // events
+  }
+}
+
+void MDRequestImpl::_dump_op_descriptor(ostream& stream) const
+{
+  if (client_request) {
+    client_request->print(stream);
+  } else if (slave_request) {
+    slave_request->print(stream);
+  } else if (internal_op >= 0) {
+    stream << "internal op " << ceph_mds_op_name(internal_op) << ":" << reqid;
+  } else {
+    // drat, it's triggered by a slave request, but we don't have a message
+    // FIXME
+    stream << "rejoin:" << reqid;
+  }
+}
index f4210ed011347a8446dbbe2adbf46ecca8cee26a..0712d3d9dddc1f3bc79d7613f3b86506b3c8f23d 100644 (file)
@@ -23,6 +23,8 @@
 #include "SimpleLock.h"
 #include "Capability.h"
 
+#include "common/TrackedOp.h"
+
 class LogSegment;
 class Capability;
 class CInode;
@@ -156,7 +158,7 @@ typedef ceph::shared_ptr<MutationImpl> MutationRef;
  * mostly information about locks held, so that we can drop them all
  * the request is finished or forwarded.  see request_*().
  */
-struct MDRequestImpl : public MutationImpl {
+struct MDRequestImpl : public MutationImpl, public TrackedOp {
   Session *session;
   elist<MDRequestImpl*>::item item_session_request;  // if not on list, op is aborted.
 
@@ -257,12 +259,16 @@ struct MDRequestImpl : public MutationImpl {
     MClientRequest *client_req;
     class Message *triggering_slave_req;
     int slave_to;
+    utime_t initiated;
+    utime_t throttled, all_read, dispatched;
+    int internal_op;
     // keep these default values synced to MutationImpl's
     Params() : attempt(0), client_req(NULL),
         triggering_slave_req(NULL), slave_to(-1), internal_op(-1) {}
   };
-  MDRequestImpl(const Params& params) :
+  MDRequestImpl(const Params& params, OpTracker *tracker) :
     MutationImpl(params.reqid, params.attempt, params.slave_to),
+    TrackedOp(tracker, params.initiated),
     session(NULL), item_session_request(this),
     client_request(params.client_req), straydn(NULL), snapid(CEPH_NOSNAP),
     tracei(NULL), tracedn(NULL), alloc_ino(0), used_prealloc_ino(0), snap_caps(0),
@@ -270,6 +276,12 @@ struct MDRequestImpl : public MutationImpl {
     slave_request(NULL), internal_op(params.internal_op), retry(0),
     waited_for_osdmap(false), _more(NULL) {
     in[0] = in[1] = NULL;
+    if (!params.throttled.is_zero())
+      tracker->_mark_event(this, "throttled", params.throttled);
+    if (!params.all_read.is_zero())
+      tracker->_mark_event(this, "all_read", params.all_read);
+    if (!params.dispatched.is_zero())
+      tracker->_mark_event(this, "dispatched", params.dispatched);
   }
   ~MDRequestImpl();
   
@@ -287,6 +299,12 @@ struct MDRequestImpl : public MutationImpl {
   void clear_ambiguous_auth();
 
   void print(ostream &out);
+
+  // TrackedOp stuff
+  typedef ceph::shared_ptr<MDRequestImpl> Ref;
+protected:
+  void _dump(utime_t now, Formatter *f) const;
+  void _dump_op_descriptor(ostream& stream) const;
 };
 
 typedef ceph::shared_ptr<MDRequestImpl> MDRequestRef;