From: Kotresh HR Date: Wed, 7 Oct 2020 11:33:48 +0000 (+0530) Subject: mds: throttle cap acquisition via readdir X-Git-Tag: v14.2.17~58^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=4c0390bce338f0e3944f7d15eaedc30dcd49f6ec;p=ceph.git mds: throttle cap acquisition via readdir A trivial "find" command on a large directory hierarchy will cause the client to receive caps significantly faster than it will release. The MDS will try to have the client reduce its caps below the mds_max_caps_per_client limit but the recall throttles prevent it from catching up to the pace of acquisition. The solution is to throttle readdir from client. This patch does the same. The readdir is throttled on the condition that the number of caps acquired is greater than certain percentage of mds_max_caps_per_client (default is 10%) and cap acquisition via readdir is certain percentage of mds_max_caps_per_client (the default is 50%). When the above condition is met, the readdir request is retried after 'mds_cap_acquisition_throttle_retry_request_timeout' (default is 0.5) seconds. Fixes: https://tracker.ceph.com/issues/47307 Signed-off-by: Kotresh HR (cherry picked from commit c0de657d3f99f8a3a0d89576dff2f8e98f5f8974) Conflicts: src/mds/MDSRank.cc: mds_heartbeat_grace config is not tracked by MDSRankDispatcher in nautilus src/mds/Server.h: Per session client metrics is not available in nautilus src/mds/Server.cc: Per session client metrics and inode delegation during replayed requests fix is not present in nautilus src/mds/SessionMap.h: Session class is not restructured in nautilus qa/tasks/cephfs/cephfs_test_case.py: Using 'rank=None' in perf_dump throws error. Used 'rank=0' --- diff --git a/qa/tasks/cephfs/cephfs_test_case.py b/qa/tasks/cephfs/cephfs_test_case.py index 5ca8f0d5483..f901f44ba49 100644 --- a/qa/tasks/cephfs/cephfs_test_case.py +++ b/qa/tasks/cephfs/cephfs_test_case.py @@ -229,6 +229,9 @@ class CephFSTestCase(CephTestCase): def _session_by_id(self, session_ls): return dict([(s['id'], s) for s in session_ls]) + def perf_dump(self, rank=0, status=None): + return self.fs.rank_asok(['perf', 'dump'], rank=rank, status=status) + def wait_until_evicted(self, client_id, timeout=30): def is_client_evicted(): ls = self._session_list() diff --git a/qa/tasks/cephfs/test_client_limits.py b/qa/tasks/cephfs/test_client_limits.py index e32259795cb..613a405a6a3 100644 --- a/qa/tasks/cephfs/test_client_limits.py +++ b/qa/tasks/cephfs/test_client_limits.py @@ -150,6 +150,36 @@ class TestClientLimits(CephFSTestCase): else: raise RuntimeError("expected no client recall warning") + def test_cap_acquisition_throttle_readdir(self): + """ + Mostly readdir acquires caps faster than the mds recalls, so the cap + acquisition via readdir is throttled by retrying the readdir after + a fraction of second (0.5) by default when throttling condition is met. + """ + + max_caps_per_client = 500 + cap_acquisition_throttle = 250 + + self.config_set('mds', 'mds_max_caps_per_client', max_caps_per_client) + self.config_set('mds', 'mds_session_cap_acquisition_throttle', cap_acquisition_throttle) + + # Create 1500 files split across 6 directories, 250 each. + for i in range(1, 7): + self.mount_a.create_n_files("dir{0}/file".format(i), cap_acquisition_throttle, sync=True) + + mount_a_client_id = self.mount_a.get_global_id() + + # recursive readdir + self.mount_a.run_shell_payload("find | wc") + + # validate cap_acquisition decay counter after readdir to exceed throttle count i.e 250 + cap_acquisition_value = self.get_session(mount_a_client_id)['cap_acquisition']['value'] + self.assertGreaterEqual(cap_acquisition_value, cap_acquisition_throttle) + + # validate the throttle condition to be hit atleast once + cap_acquisition_throttle_hit_count = self.perf_dump()['mds_server']['cap_acquisition_throttle'] + self.assertGreaterEqual(cap_acquisition_throttle_hit_count, 1) + def test_client_release_bug(self): """ When a client has a bug (which we will simulate) preventing it from releasing caps, diff --git a/src/common/options.cc b/src/common/options.cc index a4bd01a0f18..c891c71e92d 100644 --- a/src/common/options.cc +++ b/src/common/options.cc @@ -7846,6 +7846,24 @@ std::vector