mds: fix false "failing to respond to cache pressure" warning

author Yan, Zheng <zyan@redhat.com>

Sat, 8 Oct 2016 07:16:40 +0000 (15:16 +0800)

committer Loic Dachary <ldachary@redhat.com>

Wed, 9 Nov 2016 14:13:42 +0000 (15:13 +0100)
author Yan, Zheng <zyan@redhat.com>
Sat, 8 Oct 2016 07:16:40 +0000 (15:16 +0800)
committer Loic Dachary <ldachary@redhat.com>
Wed, 9 Nov 2016 14:13:42 +0000 (15:13 +0100)
diff --git a/src/mds/Beacon.cc b/src/mds/Beacon.cc

index 06020af105325e6e5e9305cc20f72e756f42eac6..b2565fd66227a2add39b2aadb88609b5cbfb34cb 100644 (file)
--- a/src/mds/Beacon.cc
+++ b/src/mds/Beacon.cc
@@ -384,8 +384,10 @@ void Beacon::notify_health(MDSRank const *mds)
    {
      set<Session*> sessions;
      mds->sessionmap.get_client_session_set(sessions);
+
      utime_t cutoff = ceph_clock_now(g_ceph_context);
      cutoff -= g_conf->mds_recall_state_timeout;
+    utime_t last_recall = mds->mdcache->last_recall_state;
  
      std::list<MDSHealthMetric> late_recall_metrics;
      std::list<MDSHealthMetric> large_completed_requests_metrics;
@@ -395,7 +397,10 @@ void Beacon::notify_health(MDSRank const *mds)
          dout(20) << "Session servicing RECALL " << session->info.inst
            << ": " << session->recalled_at << " " << session->recall_release_count
            << "/" << session->recall_count << dendl;
-        if (session->recalled_at < cutoff) {
+       if (last_recall < cutoff || session->last_recall_sent < last_recall) {
+         dout(20) << "  no longer recall" << dendl;
+         session->clear_recalled_at();
+       } else if (session->recalled_at < cutoff) {
            dout(20) << "  exceeded timeout " << session->recalled_at << " vs. " << cutoff << dendl;
            std::ostringstream oss;
           oss << "Client " << session->get_human_name() << " failing to respond to cache pressure";
diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc

index 32269695e222f25ced8405b3361142b9657ca255..447d2795df89c2f54a90f7e95094b38522aeec1f 100644 (file)
--- a/src/mds/MDCache.cc
+++ b/src/mds/MDCache.cc
@@ -7316,10 +7316,11 @@ void MDCache::check_memory_usage()
  
    if (num_inodes_with_caps > g_conf->mds_cache_size) {
      float ratio = (float)g_conf->mds_cache_size * .9 / (float)num_inodes_with_caps;
-    if (ratio < 1.0)
+    if (ratio < 1.0) {
+      last_recall_state = ceph_clock_now(g_ceph_context);
        mds->server->recall_client_state(ratio);
+    }
    }
-
  }
  
  
diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h

index 9152c90698544583f802a2b67dded131b34e4bcb..7a221108787a59bc38407bd6c53b38314b996182 100644 (file)
--- a/src/mds/MDCache.h
+++ b/src/mds/MDCache.h
@@ -693,6 +693,8 @@ public:
    void trim_client_leases();
    void check_memory_usage();
  
+  utime_t last_recall_state;
+
    // shutdown
    void shutdown_start();
    void shutdown_check();
diff --git a/src/mds/SessionMap.cc b/src/mds/SessionMap.cc

index d91713f06f96a306378840836a60edc1f9da5a36..247038a46f8911ec29880b9fb9c4860b1255a2eb 100644 (file)
--- a/src/mds/SessionMap.cc
+++ b/src/mds/SessionMap.cc
@@ -771,11 +771,8 @@ void Session::notify_cap_release(size_t n_caps)
  {
    if (!recalled_at.is_zero()) {
      recall_release_count += n_caps;
-    if (recall_release_count >= recall_count) {
-      recalled_at = utime_t();
-      recall_count = 0;
-      recall_release_count = 0;
-    }
+    if (recall_release_count >= recall_count)
+      clear_recalled_at();
    }
  }
  
@@ -790,13 +787,22 @@ void Session::notify_recall_sent(int const new_limit)
    if (recalled_at.is_zero()) {
      // Entering recall phase, set up counters so we can later
      // judge whether the client has respected the recall request
-    recalled_at = ceph_clock_now(g_ceph_context);
+    recalled_at = last_recall_sent = ceph_clock_now(g_ceph_context);
      assert (new_limit < caps.size());  // Behaviour of Server::recall_client_state
      recall_count = caps.size() - new_limit;
      recall_release_count = 0;
+  } else {
+    last_recall_sent = ceph_clock_now(g_ceph_context);
    }
  }
  
+void Session::clear_recalled_at()
+{
+  recalled_at = last_recall_sent = utime_t();
+  recall_count = 0;
+  recall_release_count = 0;
+}
+
  void Session::set_client_metadata(map<string, string> const &meta)
  {
    info.client_metadata = meta;
diff --git a/src/mds/SessionMap.h b/src/mds/SessionMap.h

index d03e16c85ac25931bc8bcf174d3a312040fba11f..6ddb603c5b75ee6beab241ac0803f9aaeffa8789 100644 (file)
--- a/src/mds/SessionMap.h
+++ b/src/mds/SessionMap.h
@@ -123,6 +123,7 @@ public:
  
    // Ephemeral state for tracking progress of capability recalls
    utime_t recalled_at;  // When was I asked to SESSION_RECALL?
+  utime_t last_recall_sent;
    uint32_t recall_count;  // How many caps was I asked to SESSION_RECALL?
    uint32_t recall_release_count;  // How many caps have I actually revoked?
  
@@ -142,6 +143,7 @@ public:
  
    void notify_cap_release(size_t n_caps);
    void notify_recall_sent(int const new_limit);
+  void clear_recalled_at();
  
    inodeno_t next_ino() {
      if (info.prealloc_inos.empty())
@@ -309,7 +311,7 @@ public:
  
    Session() : 
      state(STATE_CLOSED), state_seq(0), importing_count(0),
-    recalled_at(), recall_count(0), recall_release_count(0),
+    recall_count(0), recall_release_count(0),
      auth_caps(g_ceph_context),
      connection(NULL), item_session_list(this),
      requests(0),  // member_offset passed to front() manually
author	Yan, Zheng <zyan@redhat.com>
	Sat, 8 Oct 2016 07:16:40 +0000 (15:16 +0800)
committer	Loic Dachary <ldachary@redhat.com>
	Wed, 9 Nov 2016 14:13:42 +0000 (15:13 +0100)
src/mds/Beacon.cc		patch \| blob \| history
src/mds/MDCache.cc		patch \| blob \| history
src/mds/MDCache.h		patch \| blob \| history
src/mds/SessionMap.cc		patch \| blob \| history
src/mds/SessionMap.h		patch \| blob \| history