From: Kotresh HR Date: Wed, 7 Oct 2020 11:33:48 +0000 (+0530) Subject: mds: throttle cap acquisition via readdir X-Git-Tag: v15.2.9~114^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=bebabfe8e16ef94c5655403a8a8c7dd61253256f;p=ceph.git mds: throttle cap acquisition via readdir A trivial "find" command on a large directory hierarchy will cause the client to receive caps significantly faster than it will release. The MDS will try to have the client reduce its caps below the mds_max_caps_per_client limit but the recall throttles prevent it from catching up to the pace of acquisition. The solution is to throttle readdir from client. This patch does the same. The readdir is throttled on the condition that the number of caps acquired is greater than certain percentage of mds_max_caps_per_client (default is 10%) and cap acquisition via readdir is certain percentage of mds_max_caps_per_client (the default is 50%). When the above condition is met, the readdir request is retried after 'mds_cap_acquisition_throttle_retry_request_timeout' (default is 0.5) seconds. Fixes: https://tracker.ceph.com/issues/47307 Signed-off-by: Kotresh HR (cherry picked from commit c0de657d3f99f8a3a0d89576dff2f8e98f5f8974) Conflicts: src/mds/MDSRank.cc: mds_heartbeat_grace config is not tracked by MDSRankDispatcher in octopus src/mds/Server.h: Per session client metrics is not available in octopus --- diff --git a/qa/tasks/cephfs/cephfs_test_case.py b/qa/tasks/cephfs/cephfs_test_case.py index 42d78f8caef..3b32635880c 100644 --- a/qa/tasks/cephfs/cephfs_test_case.py +++ b/qa/tasks/cephfs/cephfs_test_case.py @@ -227,6 +227,9 @@ class CephFSTestCase(CephTestCase): def _session_by_id(self, session_ls): return dict([(s['id'], s) for s in session_ls]) + def perf_dump(self, rank=None, status=None): + return self.fs.rank_asok(['perf', 'dump'], rank=rank, status=status) + def wait_until_evicted(self, client_id, timeout=30): def is_client_evicted(): ls = self._session_list() diff --git a/qa/tasks/cephfs/test_client_limits.py b/qa/tasks/cephfs/test_client_limits.py index 51c3048bd63..9cfda425488 100644 --- a/qa/tasks/cephfs/test_client_limits.py +++ b/qa/tasks/cephfs/test_client_limits.py @@ -156,6 +156,36 @@ class TestClientLimits(CephFSTestCase): else: raise RuntimeError("expected no client recall warning") + def test_cap_acquisition_throttle_readdir(self): + """ + Mostly readdir acquires caps faster than the mds recalls, so the cap + acquisition via readdir is throttled by retrying the readdir after + a fraction of second (0.5) by default when throttling condition is met. + """ + + max_caps_per_client = 500 + cap_acquisition_throttle = 250 + + self.config_set('mds', 'mds_max_caps_per_client', max_caps_per_client) + self.config_set('mds', 'mds_session_cap_acquisition_throttle', cap_acquisition_throttle) + + # Create 1500 files split across 6 directories, 250 each. + for i in range(1, 7): + self.mount_a.create_n_files("dir{0}/file".format(i), cap_acquisition_throttle, sync=True) + + mount_a_client_id = self.mount_a.get_global_id() + + # recursive readdir + self.mount_a.run_shell_payload("find | wc") + + # validate cap_acquisition decay counter after readdir to exceed throttle count i.e 250 + cap_acquisition_value = self.get_session(mount_a_client_id)['cap_acquisition']['value'] + self.assertGreaterEqual(cap_acquisition_value, cap_acquisition_throttle) + + # validate the throttle condition to be hit atleast once + cap_acquisition_throttle_hit_count = self.perf_dump()['mds_server']['cap_acquisition_throttle'] + self.assertGreaterEqual(cap_acquisition_throttle_hit_count, 1) + def test_client_release_bug(self): """ When a client has a bug (which we will simulate) preventing it from releasing caps, diff --git a/src/common/options.cc b/src/common/options.cc index e79be858e95..2b8899fe4ac 100644 --- a/src/common/options.cc +++ b/src/common/options.cc @@ -7802,6 +7802,24 @@ std::vector