]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: recall caps from quiescent sessions
authorPatrick Donnelly <pdonnell@redhat.com>
Mon, 26 Aug 2019 21:39:30 +0000 (14:39 -0700)
committerNathan Cutler <ncutler@suse.com>
Wed, 16 Oct 2019 10:48:13 +0000 (12:48 +0200)
This introduces two new config options [1,2] that dictate when a session
is considered quiescent by the MDS. (Options are documented fully in
options.cc.) When a session is quiescent, the MDS will preemptively
recall caps to reduce the outstanding capabilities which optimizes for
reducing work during failover.

[1] mds_session_cache_liveness_magnitude
[2] mds_session_cache_liveness_decay_rate

Fixes: https://tracker.ceph.com/issues/22446
Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
(cherry picked from commit 740f6f99a1d23e281059725b6478c91e91b1c67c)

src/common/options.cc
src/mds/MDCache.cc
src/mds/MDSRank.cc
src/mds/Server.cc
src/mds/Server.h
src/mds/SessionMap.cc
src/mds/SessionMap.h

index 22691b9ddac8391cf722d44d49a0dae6a7bdb4be..43055652deda20a86f225bf0537dcff1e68f1e57 100644 (file)
@@ -7648,6 +7648,18 @@ std::vector<Option> get_mds_options() {
     .set_default(60.0)
     .set_description("decay rate for warning on slow session cap recall"),
 
+    Option("mds_session_cache_liveness_decay_rate", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .add_see_also("mds_session_cache_liveness_magnitude")
+    .set_default(5_min)
+    .set_description("decay rate for session liveness leading to preemptive cap recall")
+    .set_long_description("This determines how long a session needs to be quiescent before the MDS begins preemptively recalling capabilities. The default of 5 minutes will cause 10 halvings of the decay counter after 1 hour, or 1/1024. The default magnitude of 10 (1^10 or 1024) is chosen so that the MDS considers a previously chatty session (approximately) to be quiescent after 1 hour."),
+
+    Option("mds_session_cache_liveness_magnitude", Option::TYPE_SIZE, Option::LEVEL_ADVANCED)
+    .add_see_also("mds_session_cache_liveness_decay_rate")
+    .set_default(10)
+    .set_description("decay magnitude for preemptively recalling caps on quiet client")
+    .set_long_description("This is the order of magnitude difference (in base 2) of the internal liveness decay counter and the number of capabilities the session holds. When this difference occurs, the MDS treats the session as quiescent and begins recalling capabilities."),
+
     Option("mds_freeze_tree_timeout", Option::TYPE_FLOAT, Option::LEVEL_DEV)
     .set_default(30)
     .set_description(""),
index 19f2c0dba6806415413394ebf7eb025f359ab8ac..a12d98dac265b460e5ffc15a3ac272abb5719a11 100644 (file)
@@ -175,7 +175,8 @@ MDCache::MDCache(MDSRank *m, PurgeQueue &purge_queue_) :
           trim_client_leases();
           trim();
           check_memory_usage();
-          mds->server->recall_client_state(nullptr, Server::RecallFlags::ENFORCE_MAX);
+          auto flags = Server::RecallFlags::ENFORCE_MAX|Server::RecallFlags::ENFORCE_LIVENESS;
+          mds->server->recall_client_state(nullptr, flags);
           upkeep_last_trim = clock::now();
         } else {
           dout(10) << "cache not ready for trimming" << dendl;
index ac1f0664ccf1751f8f51cad78a4deeb576173b92..28697dc659901e340345d2521fdb09986de8d741 100644 (file)
@@ -3616,6 +3616,7 @@ const char** MDSRankDispatcher::get_tracked_conf_keys() const
     "mds_recall_max_decay_rate",
     "mds_recall_warning_decay_rate",
     "mds_request_load_average_decay_rate",
+    "mds_session_cache_liveness_decay_rate",
     NULL
   };
   return KEYS;
index 722c9084ea99f00c41dce74e954a3f272363f00a..6e673c55ff3106590c2c09d8d1daaf3b2fc58cb5 100644 (file)
@@ -1566,6 +1566,7 @@ std::pair<bool, uint64_t> Server::recall_client_state(MDSGatherBuilder* gather,
   const auto now = clock::now();
   const bool steady = !!(flags&RecallFlags::STEADY);
   const bool enforce_max = !!(flags&RecallFlags::ENFORCE_MAX);
+  const bool enforce_liveness = !!(flags&RecallFlags::ENFORCE_LIVENESS);
   const bool trim = !!(flags&RecallFlags::TRIM);
 
   const auto max_caps_per_client = g_conf().get_val<uint64_t>("mds_max_caps_per_client");
@@ -1573,6 +1574,7 @@ std::pair<bool, uint64_t> Server::recall_client_state(MDSGatherBuilder* gather,
   const auto recall_global_max_decay_threshold = g_conf().get_val<Option::size_t>("mds_recall_global_max_decay_threshold");
   const auto recall_max_caps = g_conf().get_val<Option::size_t>("mds_recall_max_caps");
   const auto recall_max_decay_threshold = g_conf().get_val<Option::size_t>("mds_recall_max_decay_threshold");
+  const auto cache_liveness_magnitude = g_conf().get_val<Option::size_t>("mds_session_cache_liveness_magnitude");
 
   dout(7) << __func__ << ":"
            << " min=" << min_caps_per_client
@@ -1583,9 +1585,10 @@ std::pair<bool, uint64_t> Server::recall_client_state(MDSGatherBuilder* gather,
 
   /* trim caps of sessions with the most caps first */
   std::multimap<uint64_t, Session*> caps_session;
-  auto f = [&caps_session, enforce_max, trim, max_caps_per_client](auto& s) {
+  auto f = [&caps_session, enforce_max, enforce_liveness, trim, max_caps_per_client, cache_liveness_magnitude](auto& s) {
     auto num_caps = s->caps.size();
-    if (trim || (enforce_max && num_caps > max_caps_per_client)) {
+    auto cache_liveness = s->get_session_cache_liveness();
+    if (trim || (enforce_max && num_caps > max_caps_per_client) || (enforce_liveness && cache_liveness < (num_caps>>cache_liveness_magnitude))) {
       caps_session.emplace(std::piecewise_construct, std::forward_as_tuple(num_caps), std::forward_as_tuple(s));
     }
   };
index b88a212349729ae790264b260a56aa516158e460..49de0af681d92381be4de5cae9875bd89801e88e 100644 (file)
@@ -174,6 +174,7 @@ public:
     STEADY = (1<<0),
     ENFORCE_MAX = (1<<1),
     TRIM = (1<<2),
+    ENFORCE_LIVENESS = (1<<3),
   };
   std::pair<bool, uint64_t> recall_client_state(MDSGatherBuilder* gather, RecallFlags=RecallFlags::NONE);
   void force_clients_readonly();
index 35edac2ccdd11eeb03cccb84fb31fc29ce1f6785..3259d5f807ec50a6b50771cc49a76a3ca4bf6991 100644 (file)
@@ -588,6 +588,7 @@ void Session::dump(Formatter *f) const
   f->dump_object("release_caps", release_caps);
   f->dump_object("recall_caps_throttle", recall_caps_throttle);
   f->dump_object("recall_caps_throttle2o", recall_caps_throttle2o);
+  f->dump_object("session_cache_liveness", session_cache_liveness);
   info.dump(f);
 }
 
@@ -1072,6 +1073,14 @@ void SessionMap::handle_conf_change(const std::set<std::string>& changed)
     };
     apply_to_open_sessions(mut);
   }
+  if (changed.count("mds_session_cache_liveness_decay_rate")) {
+    auto d = g_conf().get_val<double>("mds_session_cache_liveness_decay_rate");
+    auto mut = [d](auto s) {
+      s->session_cache_liveness = DecayCounter(d);
+      s->session_cache_liveness.hit(s->caps.size()); /* so the MDS doesn't immediately start trimming a new session */
+    };
+    apply_to_open_sessions(mut);
+  }
 }
 
 void SessionMap::update_average_session_age() {
index 6dc90fb54d7e9aef3d9671fab31c05bb43ffe9cd..6f694b8c32a9ad5838357c349fafe45fb07bf2ed 100644 (file)
@@ -128,6 +128,9 @@ private:
   // New limit in SESSION_RECALL
   uint32_t recall_limit = 0;
 
+  // session caps liveness
+  DecayCounter session_cache_liveness;
+
   // session start time -- used to track average session time
   // note that this is initialized in the constructor rather
   // than at the time of adding a session to the sessionmap
@@ -204,6 +207,9 @@ public:
   auto get_release_caps() const {
     return release_caps.get();
   }
+  auto get_session_cache_liveness() const {
+    return session_cache_liveness.get();
+  }
 
   inodeno_t next_ino() const {
     if (info.prealloc_inos.empty())
@@ -306,14 +312,17 @@ public:
   }
 
   void touch_cap(Capability *cap) {
+    session_cache_liveness.hit(1.0);
     caps.push_front(&cap->item_session_caps);
   }
 
   void touch_cap_bottom(Capability *cap) {
+    session_cache_liveness.hit(1.0);
     caps.push_back(&cap->item_session_caps);
   }
 
   void touch_lease(ClientLease *r) {
+    session_cache_liveness.hit(1.0);
     leases.push_back(&r->item_session_lease);
   }
 
@@ -412,6 +421,7 @@ public:
     release_caps(g_conf().get_val<double>("mds_recall_warning_decay_rate")),
     recall_caps_throttle(g_conf().get_val<double>("mds_recall_max_decay_rate")),
     recall_caps_throttle2o(0.5),
+    session_cache_liveness(g_conf().get_val<double>("mds_session_cache_liveness_decay_rate")),
     birth_time(clock::now()),
     auth_caps(g_ceph_context),
     item_session_list(this),