From: Patrick Donnelly Date: Wed, 23 Jan 2019 14:41:55 +0000 (-0800) Subject: mds: recall caps incrementally X-Git-Tag: v12.2.12~63^2~7 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=4b588f430ab088530e7a4fcab00d06223b1f340d;p=ceph.git mds: recall caps incrementally As with trimming, use DecayCounters to throttle the number of caps we recall, both globally and per-session. Signed-off-by: Patrick Donnelly (cherry picked from commit ef46216d8d0b659549925481b4eff6bd7d2c43c9) Conflicts: PendingReleaseNotes src/common/options.cc src/mds/Beacon.cc src/mds/Server.cc src/mds/Server.h src/mds/SessionMap.cc src/mds/SessionMap.h --- diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 88add63e244..1218bef68e7 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -16,6 +16,10 @@ via the `ceph tell mds. cache drop` command or large reductions in the cache size will no longer cause service unavailability. +* The CephFS MDS behavior with recalling caps has been significantly improved + to not attempt recalling too many caps at once, leading to instability. + MDS with a large cache (64GB+) should be more stable. + >= 12.1.2 --------- * When running 'df' on a CephFS filesystem comprising exactly one data pool, diff --git a/qa/suites/fs/bugs/client_trim_caps/tasks/trim-i22073.yaml b/qa/suites/fs/bugs/client_trim_caps/tasks/trim-i22073.yaml index 410606225f0..f0ed3366c75 100644 --- a/qa/suites/fs/bugs/client_trim_caps/tasks/trim-i22073.yaml +++ b/qa/suites/fs/bugs/client_trim_caps/tasks/trim-i22073.yaml @@ -10,7 +10,6 @@ overrides: tasks: - exec: mon.a: - - "ceph tell mds.* config set mds_max_ratio_caps_per_client 1" - "ceph tell mds.* config set mds_min_caps_per_client 1" - background_exec: mon.a: diff --git a/qa/tasks/cephfs/test_client_limits.py b/qa/tasks/cephfs/test_client_limits.py index 1f1d5467079..322bd8c895e 100644 --- a/qa/tasks/cephfs/test_client_limits.py +++ b/qa/tasks/cephfs/test_client_limits.py @@ -47,7 +47,6 @@ class TestClientLimits(CephFSTestCase): mds_min_caps_per_client = int(self.fs.get_config("mds_min_caps_per_client")) self.assertTrue(open_files >= mds_min_caps_per_client) - mds_max_ratio_caps_per_client = float(self.fs.get_config("mds_max_ratio_caps_per_client")) mount_a_client_id = self.mount_a.get_global_id() path = "subdir/mount_a" if use_subdir else "mount_a" @@ -84,14 +83,13 @@ class TestClientLimits(CephFSTestCase): # The remaining caps should comply with the numbers sent from MDS in SESSION_RECALL message, # which depend on the caps outstanding, cache size and overall ratio - recall_expected_value = int((1.0-mds_max_ratio_caps_per_client)*(open_files+2)) def expected_caps(): num_caps = self.get_session(mount_a_client_id)['num_caps'] if num_caps < mds_min_caps_per_client: raise RuntimeError("client caps fell below min!") elif num_caps == mds_min_caps_per_client: return True - elif recall_expected_value*.95 <= num_caps <= recall_expected_value*1.05: + elif num_caps < cache_size: return True else: return False diff --git a/src/common/options.cc b/src/common/options.cc index 57468a33208..3497b06a1fa 100644 --- a/src/common/options.cc +++ b/src/common/options.cc @@ -6150,6 +6150,22 @@ std::vector