From: Kotresh HR Date: Wed, 7 Oct 2020 11:33:48 +0000 (+0530) Subject: mds: throttle cap acquisition via readdir X-Git-Tag: v16.1.0~664^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=c0de657d3f99f8a3a0d89576dff2f8e98f5f8974;p=ceph.git mds: throttle cap acquisition via readdir A trivial "find" command on a large directory hierarchy will cause the client to receive caps significantly faster than it will release. The MDS will try to have the client reduce its caps below the mds_max_caps_per_client limit but the recall throttles prevent it from catching up to the pace of acquisition. The solution is to throttle readdir from client. This patch does the same. The readdir is throttled on the condition that the number of caps acquired is greater than certain percentage of mds_max_caps_per_client (default is 10%) and cap acquisition via readdir is certain percentage of mds_max_caps_per_client (the default is 50%). When the above condition is met, the readdir request is retried after 'mds_cap_acquisition_throttle_retry_request_timeout' (default is 0.5) seconds. Fixes: https://tracker.ceph.com/issues/47307 Signed-off-by: Kotresh HR --- diff --git a/qa/tasks/cephfs/cephfs_test_case.py b/qa/tasks/cephfs/cephfs_test_case.py index 036986b54b08..db4fa8e982ac 100644 --- a/qa/tasks/cephfs/cephfs_test_case.py +++ b/qa/tasks/cephfs/cephfs_test_case.py @@ -260,6 +260,9 @@ class CephFSTestCase(CephTestCase): def _session_by_id(self, session_ls): return dict([(s['id'], s) for s in session_ls]) + def perf_dump(self, rank=None, status=None): + return self.fs.rank_asok(['perf', 'dump'], rank=rank, status=status) + def wait_until_evicted(self, client_id, timeout=30): def is_client_evicted(): ls = self._session_list() diff --git a/qa/tasks/cephfs/test_client_limits.py b/qa/tasks/cephfs/test_client_limits.py index 51c3048bd63a..9cfda425488c 100644 --- a/qa/tasks/cephfs/test_client_limits.py +++ b/qa/tasks/cephfs/test_client_limits.py @@ -156,6 +156,36 @@ class TestClientLimits(CephFSTestCase): else: raise RuntimeError("expected no client recall warning") + def test_cap_acquisition_throttle_readdir(self): + """ + Mostly readdir acquires caps faster than the mds recalls, so the cap + acquisition via readdir is throttled by retrying the readdir after + a fraction of second (0.5) by default when throttling condition is met. + """ + + max_caps_per_client = 500 + cap_acquisition_throttle = 250 + + self.config_set('mds', 'mds_max_caps_per_client', max_caps_per_client) + self.config_set('mds', 'mds_session_cap_acquisition_throttle', cap_acquisition_throttle) + + # Create 1500 files split across 6 directories, 250 each. + for i in range(1, 7): + self.mount_a.create_n_files("dir{0}/file".format(i), cap_acquisition_throttle, sync=True) + + mount_a_client_id = self.mount_a.get_global_id() + + # recursive readdir + self.mount_a.run_shell_payload("find | wc") + + # validate cap_acquisition decay counter after readdir to exceed throttle count i.e 250 + cap_acquisition_value = self.get_session(mount_a_client_id)['cap_acquisition']['value'] + self.assertGreaterEqual(cap_acquisition_value, cap_acquisition_throttle) + + # validate the throttle condition to be hit atleast once + cap_acquisition_throttle_hit_count = self.perf_dump()['mds_server']['cap_acquisition_throttle'] + self.assertGreaterEqual(cap_acquisition_throttle_hit_count, 1) + def test_client_release_bug(self): """ When a client has a bug (which we will simulate) preventing it from releasing caps, diff --git a/src/common/options.cc b/src/common/options.cc index d5a5a10a0f2f..24f282bbe937 100644 --- a/src/common/options.cc +++ b/src/common/options.cc @@ -8010,6 +8010,24 @@ std::vector