From 3b5a6a4fa009f396c62155df44e395d841ab79a8 Mon Sep 17 00:00:00 2001 From: John Spray Date: Thu, 7 Jul 2016 14:00:58 +0100 Subject: [PATCH] mds: add health warning for oversized cache Various issues can lead to an oversized cache, which is a precursor to an OOM condition: let's give users a clearer message with some useful counts that might hint at what is wrong. Fixes: http://tracker.ceph.com/issues/16570 Signed-off-by: John Spray --- src/mds/Beacon.cc | 12 ++++++++++++ src/mds/MDCache.h | 1 + src/mds/StrayManager.h | 2 ++ src/messages/MMDSBeacon.h | 3 ++- 4 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/mds/Beacon.cc b/src/mds/Beacon.cc index 3b13062835abd..3cd12cdad1756 100644 --- a/src/mds/Beacon.cc +++ b/src/mds/Beacon.cc @@ -462,6 +462,18 @@ void Beacon::notify_health(MDSRank const *mds) "MDS in read-only mode"); health.metrics.push_back(m); } + + // Report if we have significantly exceeded our cache size limit + if (mds->mdcache->get_num_inodes() > g_conf->mds_cache_size * 1.5) { + std::ostringstream oss; + oss << "Too many inodes in cache (" << mds->mdcache->get_num_inodes() + << "/" << g_conf->mds_cache_size << "), " + << mds->mdcache->num_inodes_with_caps << " inodes in use by clients, " + << mds->mdcache->get_num_strays() << " stray files"; + + MDSHealthMetric m(MDS_HEALTH_CACHE_OVERSIZED, HEALTH_WARN, oss.str()); + health.metrics.push_back(m); + } } MDSMap::DaemonState Beacon::get_want_state() const diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index f8de02d2f0919..982c642ad3124 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -963,6 +963,7 @@ public: public: void eval_remote(CDentry *dn); void fetch_backtrace(inodeno_t ino, int64_t pool, bufferlist& bl, Context *fin); + uint64_t get_num_strays() const { return stray_manager.get_num_strays(); } protected: void scan_stray_dir(dirfrag_t next=dirfrag_t()); diff --git a/src/mds/StrayManager.h b/src/mds/StrayManager.h index e5c9376d16b62..264d5ff9022cc 100644 --- a/src/mds/StrayManager.h +++ b/src/mds/StrayManager.h @@ -170,6 +170,8 @@ class StrayManager bool eval_stray(CDentry *dn, bool delay=false); + uint64_t get_num_strays() const { return num_strays; } + /** * Where eval_stray was previously invoked with delay=true, call * eval_stray again for any dentries that were put on the diff --git a/src/messages/MMDSBeacon.h b/src/messages/MMDSBeacon.h index 7df54780d887a..d3840a1c49606 100644 --- a/src/messages/MMDSBeacon.h +++ b/src/messages/MMDSBeacon.h @@ -39,7 +39,8 @@ enum mds_metric_t { MDS_HEALTH_CLIENT_OLDEST_TID_MANY, MDS_HEALTH_DAMAGE, MDS_HEALTH_READ_ONLY, - MDS_HEALTH_SLOW_REQUEST + MDS_HEALTH_SLOW_REQUEST, + MDS_HEALTH_CACHE_OVERSIZED }; /** -- 2.39.5