From c0de657d3f99f8a3a0d89576dff2f8e98f5f8974 Mon Sep 17 00:00:00 2001 From: Kotresh HR Date: Wed, 7 Oct 2020 17:03:48 +0530 Subject: [PATCH] mds: throttle cap acquisition via readdir A trivial "find" command on a large directory hierarchy will cause the client to receive caps significantly faster than it will release. The MDS will try to have the client reduce its caps below the mds_max_caps_per_client limit but the recall throttles prevent it from catching up to the pace of acquisition. The solution is to throttle readdir from client. This patch does the same. The readdir is throttled on the condition that the number of caps acquired is greater than certain percentage of mds_max_caps_per_client (default is 10%) and cap acquisition via readdir is certain percentage of mds_max_caps_per_client (the default is 50%). When the above condition is met, the readdir request is retried after 'mds_cap_acquisition_throttle_retry_request_timeout' (default is 0.5) seconds. Fixes: https://tracker.ceph.com/issues/47307 Signed-off-by: Kotresh HR --- qa/tasks/cephfs/cephfs_test_case.py | 3 +++ qa/tasks/cephfs/test_client_limits.py | 30 +++++++++++++++++++++++ src/common/options.cc | 18 ++++++++++++++ src/mds/MDSRank.cc | 5 ++++ src/mds/Server.cc | 35 +++++++++++++++++++++++++++ src/mds/Server.h | 7 ++++++ src/mds/SessionMap.cc | 8 ++++++ src/mds/SessionMap.h | 11 +++++++++ 8 files changed, 117 insertions(+) diff --git a/qa/tasks/cephfs/cephfs_test_case.py b/qa/tasks/cephfs/cephfs_test_case.py index 036986b54b0..db4fa8e982a 100644 --- a/qa/tasks/cephfs/cephfs_test_case.py +++ b/qa/tasks/cephfs/cephfs_test_case.py @@ -260,6 +260,9 @@ class CephFSTestCase(CephTestCase): def _session_by_id(self, session_ls): return dict([(s['id'], s) for s in session_ls]) + def perf_dump(self, rank=None, status=None): + return self.fs.rank_asok(['perf', 'dump'], rank=rank, status=status) + def wait_until_evicted(self, client_id, timeout=30): def is_client_evicted(): ls = self._session_list() diff --git a/qa/tasks/cephfs/test_client_limits.py b/qa/tasks/cephfs/test_client_limits.py index 51c3048bd63..9cfda425488 100644 --- a/qa/tasks/cephfs/test_client_limits.py +++ b/qa/tasks/cephfs/test_client_limits.py @@ -156,6 +156,36 @@ class TestClientLimits(CephFSTestCase): else: raise RuntimeError("expected no client recall warning") + def test_cap_acquisition_throttle_readdir(self): + """ + Mostly readdir acquires caps faster than the mds recalls, so the cap + acquisition via readdir is throttled by retrying the readdir after + a fraction of second (0.5) by default when throttling condition is met. + """ + + max_caps_per_client = 500 + cap_acquisition_throttle = 250 + + self.config_set('mds', 'mds_max_caps_per_client', max_caps_per_client) + self.config_set('mds', 'mds_session_cap_acquisition_throttle', cap_acquisition_throttle) + + # Create 1500 files split across 6 directories, 250 each. + for i in range(1, 7): + self.mount_a.create_n_files("dir{0}/file".format(i), cap_acquisition_throttle, sync=True) + + mount_a_client_id = self.mount_a.get_global_id() + + # recursive readdir + self.mount_a.run_shell_payload("find | wc") + + # validate cap_acquisition decay counter after readdir to exceed throttle count i.e 250 + cap_acquisition_value = self.get_session(mount_a_client_id)['cap_acquisition']['value'] + self.assertGreaterEqual(cap_acquisition_value, cap_acquisition_throttle) + + # validate the throttle condition to be hit atleast once + cap_acquisition_throttle_hit_count = self.perf_dump()['mds_server']['cap_acquisition_throttle'] + self.assertGreaterEqual(cap_acquisition_throttle_hit_count, 1) + def test_client_release_bug(self): """ When a client has a bug (which we will simulate) preventing it from releasing caps, diff --git a/src/common/options.cc b/src/common/options.cc index d5a5a10a0f2..24f282bbe93 100644 --- a/src/common/options.cc +++ b/src/common/options.cc @@ -8010,6 +8010,24 @@ std::vector