]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: warn if client does not advance its oldest flush tid 4791/head
authorYan, Zheng <zyan@redhat.com>
Sun, 7 Jun 2015 08:30:59 +0000 (16:30 +0800)
committerYan, Zheng <zyan@redhat.com>
Tue, 18 Aug 2015 08:49:50 +0000 (16:49 +0800)
Signed-off-by: Yan, Zheng <zyan@redhat.com>
src/common/config_opts.h
src/mds/Beacon.cc
src/mds/Locker.cc
src/mds/SessionMap.h

index b21f69a788ba79aa0e1ecd95dd28209493eb0235..7c5133a63742259d447ebafaa2dab5ff400eec06 100644 (file)
@@ -475,6 +475,7 @@ OPTION(mds_snap_max_uid, OPT_U32, 65536) // The maximum UID allowed to create a
 OPTION(mds_snap_rstat, OPT_BOOL, false) // enable/disbale nested stat for snapshot
 OPTION(mds_verify_backtrace, OPT_U32, 1)
 // detect clients which aren't trimming completed requests
+OPTION(mds_max_completed_flushes, OPT_U32, 100000)
 OPTION(mds_max_completed_requests, OPT_U32, 100000)
 
 OPTION(mds_action_on_write_error, OPT_U32, 1) // 0: ignore; 1: force readonly; 2: crash
index 1312758435f0ba3525840387dee3c4c5bfb55a09..ea34ded8a13d2a783b3e6c34ef8f9d13cca11e16 100644 (file)
@@ -396,10 +396,12 @@ void Beacon::notify_health(MDSRank const *mds)
           dout(20) << "  within timeout " << session->recalled_at << " vs. " << cutoff << dendl;
         }
       }
-      if (session->get_num_trim_requests_warnings() > 0 &&
-         session->get_num_completed_requests() >= g_conf->mds_max_completed_requests) {
+      if ((session->get_num_trim_requests_warnings() > 0 &&
+          session->get_num_completed_requests() >= g_conf->mds_max_completed_requests) ||
+         (session->get_num_trim_flushes_warnings() > 0 &&
+          session->get_num_completed_flushes() >= g_conf->mds_max_completed_flushes)) {
        std::ostringstream oss;
-       oss << "Client " << session->get_human_name() << " failing to advance its oldest_client_tid";
+       oss << "Client " << session->get_human_name() << " failing to advance its oldest client/flush tid";
        MDSHealthMetric m(MDS_HEALTH_CLIENT_OLDEST_TID, HEALTH_WARN, oss.str());
        m.metadata["client_id"] = session->info.inst.name.num();
        large_completed_requests_metrics.push_back(m);
@@ -423,7 +425,7 @@ void Beacon::notify_health(MDSRank const *mds)
     } else {
       std::ostringstream oss;
       oss << "Many clients (" << large_completed_requests_metrics.size()
-       << ") failing to advance their oldest_client_tid";
+       << ") failing to advance their oldest client/flush tid";
       MDSHealthMetric m(MDS_HEALTH_CLIENT_OLDEST_TID_MANY, HEALTH_WARN, oss.str());
       m.metadata["client_count"] = large_completed_requests_metrics.size();
       health.metrics.push_back(m);
index 3849ce464da05bb1af92847de31491ba9cba9166..fb548a56e6d633b0f54c6e965108d13efb84968a 100644 (file)
@@ -2491,6 +2491,22 @@ void Locker::handle_client_caps(MClientCaps *m)
   if (m->get_oldest_flush_tid() > 0) {
     if (session->trim_completed_flushes(m->get_oldest_flush_tid())) {
       mds->mdlog->get_current_segment()->touched_sessions.insert(session->info.inst.name);
+
+      if (session->get_num_trim_flushes_warnings() > 0 &&
+         session->get_num_completed_flushes() * 2 < g_conf->mds_max_completed_flushes)
+       session->reset_num_trim_flushes_warnings();
+    } else {
+      if (session->get_num_completed_flushes() >=
+         (g_conf->mds_max_completed_flushes << session->get_num_trim_flushes_warnings())) {
+       session->inc_num_trim_flushes_warnings();
+       stringstream ss;
+       ss << "client." << session->get_client() << " does not advance its oldest_flush_tid ("
+          << m->get_oldest_flush_tid() << "), "
+          << session->get_num_completed_flushes()
+          << " completed flushes recorded in session\n";
+       mds->clog->warn() << ss.str();
+       dout(20) << __func__ << " " << ss.str() << dendl;
+      }
     }
   }
 
index b73246e5baab34aaa937bf8cc61738a0c4453b82..cfcfa04a6fa802a9ded78f72f7ae97926c40b66c 100644 (file)
@@ -232,6 +232,7 @@ private:
   // wrote this session out?
   bool completed_requests_dirty;
 
+  unsigned num_trim_flushes_warnings;
   unsigned num_trim_requests_warnings;
 public:
   void add_completed_request(ceph_tid_t t, inodeno_t created) {
@@ -280,6 +281,11 @@ public:
     return info.completed_flushes.count(tid);
   }
 
+  unsigned get_num_completed_flushes() const { return info.completed_flushes.size(); }
+  unsigned get_num_trim_flushes_warnings() { return num_trim_flushes_warnings; }
+  void inc_num_trim_flushes_warnings() { ++num_trim_flushes_warnings; }
+  void reset_num_trim_flushes_warnings() { num_trim_flushes_warnings = 0; }
+
   unsigned get_num_completed_requests() const { return info.completed_requests.size(); }
   unsigned get_num_trim_requests_warnings() { return num_trim_requests_warnings; }
   void inc_num_trim_requests_warnings() { ++num_trim_requests_warnings; }
@@ -304,6 +310,7 @@ public:
     cap_push_seq(0),
     lease_seq(0),
     completed_requests_dirty(false),
+    num_trim_flushes_warnings(0),
     num_trim_requests_warnings(0) { }
   ~Session() {
     assert(!item_session_list.is_on_list());