From 0422673b6150df851a4ea1662637a77585cde52d Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 20 Oct 2020 01:26:33 -0400 Subject: [PATCH] qa/cephfs: add session_timeout option support When the mds revoking the Fwbl caps, the clients need to flush the dirty data back to the OSDs, but the flush may make the OSDs to be overloaded and slow, which may take more than 60 seconds to finish. Then the MDS daemons will report the WRN messages. For the teuthology test cases, let's just increase the timeout value to make it work. Fixes: https://tracker.ceph.com/issues/47565 Signed-off-by: Xiubo Li --- qa/cephfs/overrides/session_timeout.yaml | 4 ++++ .../fs/basic_workload/overrides/session_timeout.yaml | 1 + qa/suites/fs/thrash/overrides/session_timeout.yaml | 1 + qa/suites/fs/verify/overrides/session_timeout.yaml | 1 + qa/tasks/ceph.py | 7 +++++++ qa/tasks/cephfs/filesystem.py | 8 ++++++++ qa/tasks/vstart_runner.py | 1 + 7 files changed, 23 insertions(+) create mode 100644 qa/cephfs/overrides/session_timeout.yaml create mode 120000 qa/suites/fs/basic_workload/overrides/session_timeout.yaml create mode 120000 qa/suites/fs/thrash/overrides/session_timeout.yaml create mode 120000 qa/suites/fs/verify/overrides/session_timeout.yaml diff --git a/qa/cephfs/overrides/session_timeout.yaml b/qa/cephfs/overrides/session_timeout.yaml new file mode 100644 index 0000000000000..a7a1633371f3b --- /dev/null +++ b/qa/cephfs/overrides/session_timeout.yaml @@ -0,0 +1,4 @@ +overrides: + ceph: + cephfs: + session_timeout: 300 diff --git a/qa/suites/fs/basic_workload/overrides/session_timeout.yaml b/qa/suites/fs/basic_workload/overrides/session_timeout.yaml new file mode 120000 index 0000000000000..fce0318c58936 --- /dev/null +++ b/qa/suites/fs/basic_workload/overrides/session_timeout.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/session_timeout.yaml \ No newline at end of file diff --git a/qa/suites/fs/thrash/overrides/session_timeout.yaml b/qa/suites/fs/thrash/overrides/session_timeout.yaml new file mode 120000 index 0000000000000..fce0318c58936 --- /dev/null +++ b/qa/suites/fs/thrash/overrides/session_timeout.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/session_timeout.yaml \ No newline at end of file diff --git a/qa/suites/fs/verify/overrides/session_timeout.yaml b/qa/suites/fs/verify/overrides/session_timeout.yaml new file mode 120000 index 0000000000000..fce0318c58936 --- /dev/null +++ b/qa/suites/fs/verify/overrides/session_timeout.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/session_timeout.yaml \ No newline at end of file diff --git a/qa/tasks/ceph.py b/qa/tasks/ceph.py index 100cfd7646b89..54eb5e0a6f549 100644 --- a/qa/tasks/ceph.py +++ b/qa/tasks/ceph.py @@ -1723,6 +1723,13 @@ def task(ctx, config): cephfs: max_mds: 2 + To change the mdsmap's default session_timeout (60 seconds), use:: + + tasks: + - ceph: + cephfs: + session_timeout: 300 + Note, this will cause the task to check the /scratch_devs file on each node for available devices. If no such file is found, /dev/sdb will be used. diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py index 89549b39973e5..4fbf5743e757f 100644 --- a/qa/tasks/cephfs/filesystem.py +++ b/qa/tasks/cephfs/filesystem.py @@ -571,6 +571,9 @@ class Filesystem(MDSCluster): def set_max_mds(self, max_mds): self.set_var("max_mds", "%d" % max_mds) + def set_session_timeout(self, timeout): + self.set_var("session_timeout", "%d" % timeout) + def set_allow_standby_replay(self, yes): self.set_var("allow_standby_replay", yes) @@ -638,6 +641,11 @@ class Filesystem(MDSCluster): if max_mds > 1: self.set_max_mds(max_mds) + # If absent will use the default value (60 seconds) + session_timeout = self.fs_config.get('session_timeout', 60) + if session_timeout != 60: + self.set_session_timeout(session_timeout) + self.getinfo(refresh = True) def destroy(self, reset_obj_attrs=True): diff --git a/qa/tasks/vstart_runner.py b/qa/tasks/vstart_runner.py index 054c693305d93..e7d942a1096d8 100644 --- a/qa/tasks/vstart_runner.py +++ b/qa/tasks/vstart_runner.py @@ -1166,6 +1166,7 @@ class LocalFilesystem(Filesystem, LocalMDSCluster): self.metadata_overlay = False self.data_pool_name = None self.data_pools = None + self.fs_config = None # Hack: cheeky inspection of ceph.conf to see what MDSs exist self.mds_ids = set() -- 2.39.5