]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: release free heap pages after trim 31793/head
authorPatrick Donnelly <pdonnell@redhat.com>
Thu, 21 Nov 2019 18:09:39 +0000 (10:09 -0800)
committerPatrick Donnelly <pdonnell@redhat.com>
Thu, 21 Nov 2019 18:54:34 +0000 (10:54 -0800)
MDS free heap space can grow to large for some workloads (like smallfile
and recursive deletes). This can cause the MDS mapped memory to grow
well beyond memory targets.

When we finally use the PriorityCache in the MDS, this will not be
necessary anymore as the PriorityCache already does this.

Fixes: https://tracker.ceph.com/issues/42938
Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
src/common/options.cc
src/mds/MDCache.cc
src/mds/MDCache.h

index c5cad0169cfa71d2484c37d7890e27b3625212a6..7f65b8bd048d693546bcf8128615344b7ca18e29 100644 (file)
@@ -7455,7 +7455,13 @@ std::vector<Option> get_mds_options() {
 
     Option("mds_cache_trim_interval", Option::TYPE_SECS, Option::LEVEL_ADVANCED)
     .set_default(1)
-    .set_description("interval in seconds between cache trimming"),
+    .set_description("interval in seconds between cache trimming")
+    .set_flag(Option::FLAG_RUNTIME),
+
+    Option("mds_cache_release_free_interval", Option::TYPE_SECS, Option::LEVEL_DEV)
+    .set_default(10)
+    .set_description("interval in seconds between heap releases")
+    .set_flag(Option::FLAG_RUNTIME),
 
     Option("mds_cache_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
     .set_default(0)
index de35bd4444760a9c7982670438f4b8867e4748cf..2a9248ab558444a79c9c51563d179d913e8421f0 100644 (file)
@@ -163,8 +163,8 @@ MDCache::MDCache(MDSRank *m, PurgeQueue &purge_queue_) :
     while (!upkeep_trim_shutdown.load()) {
       auto now = clock::now();
       auto since = now-upkeep_last_trim;
-      auto interval = clock::duration(g_conf().get_val<std::chrono::seconds>("mds_cache_trim_interval"));
-      if (since >= interval*.90) {
+      auto trim_interval = clock::duration(g_conf().get_val<std::chrono::seconds>("mds_cache_trim_interval"));
+      if (since >= trim_interval*.90) {
         lock.unlock(); /* mds_lock -> upkeep_mutex */
         std::scoped_lock mds_lock(mds->mds_lock);
         lock.lock();
@@ -177,13 +177,24 @@ MDCache::MDCache(MDSRank *m, PurgeQueue &purge_queue_) :
           check_memory_usage();
           auto flags = Server::RecallFlags::ENFORCE_MAX|Server::RecallFlags::ENFORCE_LIVENESS;
           mds->server->recall_client_state(nullptr, flags);
-          upkeep_last_trim = clock::now();
+          upkeep_last_trim = now = clock::now();
         } else {
           dout(10) << "cache not ready for trimming" << dendl;
         }
       } else {
-        interval -= since;
+        trim_interval -= since;
+      }
+      since = now-upkeep_last_release;
+      auto release_interval = clock::duration(g_conf().get_val<std::chrono::seconds>("mds_cache_release_free_interval"));
+      if (since >= release_interval) {
+        /* XXX not necessary once MDCache uses PriorityCache */
+        dout(10) << "releasing free memory" << dendl;
+        ceph_heap_release_free_memory();
+        upkeep_last_release = clock::now();
+      } else {
+        release_interval -= since;
       }
+      auto interval = std::min(release_interval, trim_interval);
       dout(20) << "upkeep thread waiting interval " << interval << dendl;
       upkeep_cvar.wait_for(lock, interval);
     }
index abed6a6d26e3c01ef1dd83b44e06efe85b790554..d79cb8912f7a5cc451bb8ee908de77a0651690f7 100644 (file)
@@ -1286,6 +1286,7 @@ class MDCache {
   ceph::mutex upkeep_mutex = ceph::make_mutex("MDCache::upkeep_mutex");
   ceph::condition_variable upkeep_cvar;
   time upkeep_last_trim = time::min();
+  time upkeep_last_release = time::min();
   std::atomic<bool> upkeep_trim_shutdown{false};
 };