]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mds: add health warning for oversized cache
authorJohn Spray <john.spray@redhat.com>
Thu, 7 Jul 2016 13:00:58 +0000 (14:00 +0100)
committerJohn Spray <john.spray@redhat.com>
Mon, 11 Jul 2016 13:11:31 +0000 (14:11 +0100)
Various issues can lead to an oversized cache, which
is a precursor to an OOM condition: let's give users
a clearer message with some useful counts that might
hint at what is wrong.

Fixes: http://tracker.ceph.com/issues/16570
Signed-off-by: John Spray <john.spray@redhat.com>
src/mds/Beacon.cc
src/mds/MDCache.h
src/mds/StrayManager.h
src/messages/MMDSBeacon.h

index 3b13062835abdf31dc6b9334cde4202fa1f2e449..3cd12cdad1756512d1c465a6ea9739df57690b47 100644 (file)
@@ -462,6 +462,18 @@ void Beacon::notify_health(MDSRank const *mds)
                       "MDS in read-only mode");
     health.metrics.push_back(m);
   }
+
+  // Report if we have significantly exceeded our cache size limit
+  if (mds->mdcache->get_num_inodes() > g_conf->mds_cache_size * 1.5) {
+    std::ostringstream oss;
+    oss << "Too many inodes in cache (" << mds->mdcache->get_num_inodes()
+        << "/" << g_conf->mds_cache_size << "), "
+        << mds->mdcache->num_inodes_with_caps << " inodes in use by clients, "
+        << mds->mdcache->get_num_strays() << " stray files";
+
+    MDSHealthMetric m(MDS_HEALTH_CACHE_OVERSIZED, HEALTH_WARN, oss.str());
+    health.metrics.push_back(m);
+  }
 }
 
 MDSMap::DaemonState Beacon::get_want_state() const
index f8de02d2f0919801ba91fe1a8ddbca0d3628229d..982c642ad3124dc33c81b4dbd8513673a55d1d0f 100644 (file)
@@ -963,6 +963,7 @@ public:
 public:
   void eval_remote(CDentry *dn);
   void fetch_backtrace(inodeno_t ino, int64_t pool, bufferlist& bl, Context *fin);
+  uint64_t get_num_strays() const { return stray_manager.get_num_strays(); }
 
 protected:
   void scan_stray_dir(dirfrag_t next=dirfrag_t());
index e5c9376d16b6236b8cb97434447adb1405ae4915..264d5ff9022cca5ceb241c5a37037770c6d93f0f 100644 (file)
@@ -170,6 +170,8 @@ class StrayManager
 
   bool eval_stray(CDentry *dn, bool delay=false);
 
+  uint64_t get_num_strays() const { return num_strays; }
+
   /**
    * Where eval_stray was previously invoked with delay=true, call
    * eval_stray again for any dentries that were put on the
index 7df54780d887aac78d6d9fc6fb7efd9e4bd4c756..d3840a1c49606c5b6ffc8ff7c7d26a7bdfec14b2 100644 (file)
@@ -39,7 +39,8 @@ enum mds_metric_t {
   MDS_HEALTH_CLIENT_OLDEST_TID_MANY,
   MDS_HEALTH_DAMAGE,
   MDS_HEALTH_READ_ONLY,
-  MDS_HEALTH_SLOW_REQUEST
+  MDS_HEALTH_SLOW_REQUEST,
+  MDS_HEALTH_CACHE_OVERSIZED
 };
 
 /**