]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mds: recall caps from quiescent sessions
authorPatrick Donnelly <pdonnell@redhat.com>
Mon, 26 Aug 2019 21:39:30 +0000 (14:39 -0700)
committerPatrick Donnelly <pdonnell@redhat.com>
Fri, 13 Sep 2019 00:42:03 +0000 (17:42 -0700)
This introduces two new config options [1,2] that dictate when a session
is considered quiescent by the MDS. (Options are documented fully in
options.cc.) When a session is quiescent, the MDS will preemptively
recall caps to reduce the outstanding capabilities which optimizes for
reducing work during failover.

[1] mds_session_cache_liveness_magnitude
[2] mds_session_cache_liveness_decay_rate

Fixes: https://tracker.ceph.com/issues/22446
Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
src/common/options.cc
src/mds/MDCache.cc
src/mds/MDSRank.cc
src/mds/Server.cc
src/mds/Server.h
src/mds/SessionMap.cc
src/mds/SessionMap.h

index 5ccc6142046adfc7d6031d14265fb37f2d85d49d..fb80cd739d2f13be8c4b29d1cc2263b5b6d32e6d 100644 (file)
@@ -7823,6 +7823,18 @@ std::vector<Option> get_mds_options() {
     .set_default(60.0)
     .set_description("decay rate for warning on slow session cap recall"),
 
+    Option("mds_session_cache_liveness_decay_rate", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .add_see_also("mds_session_cache_liveness_magnitude")
+    .set_default(5_min)
+    .set_description("decay rate for session liveness leading to preemptive cap recall")
+    .set_long_description("This determines how long a session needs to be quiescent before the MDS begins preemptively recalling capabilities. The default of 5 minutes will cause 10 halvings of the decay counter after 1 hour, or 1/1024. The default magnitude of 10 (1^10 or 1024) is chosen so that the MDS considers a previously chatty session (approximately) to be quiescent after 1 hour."),
+
+    Option("mds_session_cache_liveness_magnitude", Option::TYPE_SIZE, Option::LEVEL_ADVANCED)
+    .add_see_also("mds_session_cache_liveness_decay_rate")
+    .set_default(10)
+    .set_description("decay magnitude for preemptively recalling caps on quiet client")
+    .set_long_description("This is the order of magnitude difference (in base 2) of the internal liveness decay counter and the number of capabilities the session holds. When this difference occurs, the MDS treats the session as quiescent and begins recalling capabilities."),
+
     Option("mds_freeze_tree_timeout", Option::TYPE_FLOAT, Option::LEVEL_DEV)
     .set_default(30)
     .set_description(""),
index df699f32b8d9fe3f486b050514b62fca7c8424f6..ffa811fa0a0684d40dc58f7655456c69fc279dea 100644 (file)
@@ -175,7 +175,8 @@ MDCache::MDCache(MDSRank *m, PurgeQueue &purge_queue_) :
           trim_client_leases();
           trim();
           check_memory_usage();
-          mds->server->recall_client_state(nullptr, Server::RecallFlags::ENFORCE_MAX);
+          auto flags = Server::RecallFlags::ENFORCE_MAX|Server::RecallFlags::ENFORCE_LIVENESS;
+          mds->server->recall_client_state(nullptr, flags);
           upkeep_last_trim = clock::now();
         } else {
           dout(10) << "cache not ready for trimming" << dendl;
index 4fbeff427aa2277e847d2c526c34b0aeb7b51e87..77696da76fc19afff2df38867685da12d918a687 100644 (file)
@@ -3671,6 +3671,7 @@ const char** MDSRankDispatcher::get_tracked_conf_keys() const
     "mds_recall_max_decay_rate",
     "mds_recall_warning_decay_rate",
     "mds_request_load_average_decay_rate",
+    "mds_session_cache_liveness_decay_rate",
     NULL
   };
   return KEYS;
index d710e917091bd49ed3b9f87eaab4cfa0d7e55c0e..1f7f4c8d40ece4e66d0215b088712a981647b4e9 100644 (file)
@@ -1570,6 +1570,7 @@ std::pair<bool, uint64_t> Server::recall_client_state(MDSGatherBuilder* gather,
   const auto now = clock::now();
   const bool steady = !!(flags&RecallFlags::STEADY);
   const bool enforce_max = !!(flags&RecallFlags::ENFORCE_MAX);
+  const bool enforce_liveness = !!(flags&RecallFlags::ENFORCE_LIVENESS);
   const bool trim = !!(flags&RecallFlags::TRIM);
 
   const auto max_caps_per_client = g_conf().get_val<uint64_t>("mds_max_caps_per_client");
@@ -1577,6 +1578,7 @@ std::pair<bool, uint64_t> Server::recall_client_state(MDSGatherBuilder* gather,
   const auto recall_global_max_decay_threshold = g_conf().get_val<Option::size_t>("mds_recall_global_max_decay_threshold");
   const auto recall_max_caps = g_conf().get_val<Option::size_t>("mds_recall_max_caps");
   const auto recall_max_decay_threshold = g_conf().get_val<Option::size_t>("mds_recall_max_decay_threshold");
+  const auto cache_liveness_magnitude = g_conf().get_val<Option::size_t>("mds_session_cache_liveness_magnitude");
 
   dout(7) << __func__ << ":"
            << " min=" << min_caps_per_client
@@ -1587,9 +1589,10 @@ std::pair<bool, uint64_t> Server::recall_client_state(MDSGatherBuilder* gather,
 
   /* trim caps of sessions with the most caps first */
   std::multimap<uint64_t, Session*> caps_session;
-  auto f = [&caps_session, enforce_max, trim, max_caps_per_client](auto& s) {
+  auto f = [&caps_session, enforce_max, enforce_liveness, trim, max_caps_per_client, cache_liveness_magnitude](auto& s) {
     auto num_caps = s->caps.size();
-    if (trim || (enforce_max && num_caps > max_caps_per_client)) {
+    auto cache_liveness = s->get_session_cache_liveness();
+    if (trim || (enforce_max && num_caps > max_caps_per_client) || (enforce_liveness && cache_liveness < (num_caps>>cache_liveness_magnitude))) {
       caps_session.emplace(std::piecewise_construct, std::forward_as_tuple(num_caps), std::forward_as_tuple(s));
     }
   };
index 5a3aec0b1cadfe1de09c320e6848e7310fc48d94..805ac540c2ff96e0a7532244e0beefeaf2dca4b6 100644 (file)
@@ -174,6 +174,7 @@ public:
     STEADY = (1<<0),
     ENFORCE_MAX = (1<<1),
     TRIM = (1<<2),
+    ENFORCE_LIVENESS = (1<<3),
   };
   std::pair<bool, uint64_t> recall_client_state(MDSGatherBuilder* gather, RecallFlags=RecallFlags::NONE);
   void force_clients_readonly();
index a64ec838b953f855fdec1a5770c8f799620aa8f2..854d575a8ea145982f49ff8657bbaea203e3f0f7 100644 (file)
@@ -588,6 +588,7 @@ void Session::dump(Formatter *f) const
   f->dump_object("release_caps", release_caps);
   f->dump_object("recall_caps_throttle", recall_caps_throttle);
   f->dump_object("recall_caps_throttle2o", recall_caps_throttle2o);
+  f->dump_object("session_cache_liveness", session_cache_liveness);
   info.dump(f);
 }
 
@@ -1072,6 +1073,14 @@ void SessionMap::handle_conf_change(const std::set<std::string>& changed)
     };
     apply_to_open_sessions(mut);
   }
+  if (changed.count("mds_session_cache_liveness_decay_rate")) {
+    auto d = g_conf().get_val<double>("mds_session_cache_liveness_decay_rate");
+    auto mut = [d](auto s) {
+      s->session_cache_liveness = DecayCounter(d);
+      s->session_cache_liveness.hit(s->caps.size()); /* so the MDS doesn't immediately start trimming a new session */
+    };
+    apply_to_open_sessions(mut);
+  }
 }
 
 void SessionMap::update_average_session_age() {
index cbc6dee982e11680338afec2d6c12a5076361e19..2d40d49361326608fbbb3e4402aa961b5365f308 100644 (file)
@@ -128,6 +128,9 @@ private:
   // New limit in SESSION_RECALL
   uint32_t recall_limit = 0;
 
+  // session caps liveness
+  DecayCounter session_cache_liveness;
+
   // session start time -- used to track average session time
   // note that this is initialized in the constructor rather
   // than at the time of adding a session to the sessionmap
@@ -204,6 +207,9 @@ public:
   auto get_release_caps() const {
     return release_caps.get();
   }
+  auto get_session_cache_liveness() const {
+    return session_cache_liveness.get();
+  }
 
   inodeno_t next_ino() const {
     if (info.prealloc_inos.empty())
@@ -306,14 +312,17 @@ public:
   }
 
   void touch_cap(Capability *cap) {
+    session_cache_liveness.hit(1.0);
     caps.push_front(&cap->item_session_caps);
   }
 
   void touch_cap_bottom(Capability *cap) {
+    session_cache_liveness.hit(1.0);
     caps.push_back(&cap->item_session_caps);
   }
 
   void touch_lease(ClientLease *r) {
+    session_cache_liveness.hit(1.0);
     leases.push_back(&r->item_session_lease);
   }
 
@@ -412,6 +421,7 @@ public:
     release_caps(g_conf().get_val<double>("mds_recall_warning_decay_rate")),
     recall_caps_throttle(g_conf().get_val<double>("mds_recall_max_decay_rate")),
     recall_caps_throttle2o(0.5),
+    session_cache_liveness(g_conf().get_val<double>("mds_session_cache_liveness_decay_rate")),
     birth_time(clock::now()),
     auth_caps(g_ceph_context),
     item_session_list(this),