From 8367448765090470344b474870a689c9ca39e932 Mon Sep 17 00:00:00 2001 From: Jos Collin Date: Wed, 25 Nov 2020 15:38:08 +0530 Subject: [PATCH] qa: test DispatchQueue throttling Fixes: https://tracker.ceph.com/issues/46226 Signed-off-by: Jos Collin --- .../fs/multiclient/tasks/cephfs_misc_tests.yaml | 1 + qa/suites/fs/thrash/multifs/tasks/1-thrash/mds.yaml | 1 + qa/suites/fs/thrash/multifs/tasks/1-thrash/mon.yaml | 1 + .../fs/thrash/workloads/tasks/1-thrash/mds.yaml | 1 + .../fs/thrash/workloads/tasks/1-thrash/mon.yaml | 1 + .../fs/thrash/workloads/tasks/1-thrash/osd.yaml | 1 + qa/tasks/cephfs/test_misc.py | 12 ++++++++++++ 7 files changed, 18 insertions(+) diff --git a/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml b/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml index 40d63ba792b..d9d5d3ca39d 100644 --- a/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml +++ b/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml @@ -11,3 +11,4 @@ overrides: - has not responded to cap revoke by MDS for over - MDS_CLIENT_LATE_RELEASE - responding to mclientcaps + - Throttler Limit has been hit. Some message processing may be significantly delayed. diff --git a/qa/suites/fs/thrash/multifs/tasks/1-thrash/mds.yaml b/qa/suites/fs/thrash/multifs/tasks/1-thrash/mds.yaml index 33748cea5cd..a151d7a49cf 100644 --- a/qa/suites/fs/thrash/multifs/tasks/1-thrash/mds.yaml +++ b/qa/suites/fs/thrash/multifs/tasks/1-thrash/mds.yaml @@ -5,3 +5,4 @@ overrides: ceph: log-ignorelist: - Replacing daemon mds + - Throttler Limit has been hit. Some message processing may be significantly delayed. diff --git a/qa/suites/fs/thrash/multifs/tasks/1-thrash/mon.yaml b/qa/suites/fs/thrash/multifs/tasks/1-thrash/mon.yaml index fbbe16151ce..370fd66f79f 100644 --- a/qa/suites/fs/thrash/multifs/tasks/1-thrash/mon.yaml +++ b/qa/suites/fs/thrash/multifs/tasks/1-thrash/mon.yaml @@ -3,6 +3,7 @@ overrides: log-ignorelist: - overall HEALTH_ - \(MON_DOWN\) + - Throttler Limit has been hit. Some message processing may be significantly delayed. tasks: - mon_thrash: check_mds_failover: True diff --git a/qa/suites/fs/thrash/workloads/tasks/1-thrash/mds.yaml b/qa/suites/fs/thrash/workloads/tasks/1-thrash/mds.yaml index 33748cea5cd..a151d7a49cf 100644 --- a/qa/suites/fs/thrash/workloads/tasks/1-thrash/mds.yaml +++ b/qa/suites/fs/thrash/workloads/tasks/1-thrash/mds.yaml @@ -5,3 +5,4 @@ overrides: ceph: log-ignorelist: - Replacing daemon mds + - Throttler Limit has been hit. Some message processing may be significantly delayed. diff --git a/qa/suites/fs/thrash/workloads/tasks/1-thrash/mon.yaml b/qa/suites/fs/thrash/workloads/tasks/1-thrash/mon.yaml index fbbe16151ce..370fd66f79f 100644 --- a/qa/suites/fs/thrash/workloads/tasks/1-thrash/mon.yaml +++ b/qa/suites/fs/thrash/workloads/tasks/1-thrash/mon.yaml @@ -3,6 +3,7 @@ overrides: log-ignorelist: - overall HEALTH_ - \(MON_DOWN\) + - Throttler Limit has been hit. Some message processing may be significantly delayed. tasks: - mon_thrash: check_mds_failover: True diff --git a/qa/suites/fs/thrash/workloads/tasks/1-thrash/osd.yaml b/qa/suites/fs/thrash/workloads/tasks/1-thrash/osd.yaml index 62e30ba230f..968ac782d5b 100644 --- a/qa/suites/fs/thrash/workloads/tasks/1-thrash/osd.yaml +++ b/qa/suites/fs/thrash/workloads/tasks/1-thrash/osd.yaml @@ -4,5 +4,6 @@ overrides: - but it is still running - objects unfound and apparently lost - MDS_SLOW_METADATA_IO + - Throttler Limit has been hit. Some message processing may be significantly delayed. tasks: - thrashosds: diff --git a/qa/tasks/cephfs/test_misc.py b/qa/tasks/cephfs/test_misc.py index 6a295bbfdf1..6aa222f9a61 100644 --- a/qa/tasks/cephfs/test_misc.py +++ b/qa/tasks/cephfs/test_misc.py @@ -198,6 +198,18 @@ class TestMisc(CephFSTestCase): info = self.fs.mds_asok(['dump', 'inode', hex(ino)]) assert info['path'] == "/foo" + def test_dispatch_queue_throttle_message(self): + """ + That cluster log a warning when the Dispatch Queue Throttle Limit hits + """ + self.config_set('mds', 'ms_dispatch_throttle_log_interval', 5) + self.config_set('mds', 'ms_dispatch_throttle_bytes', 10240) + + # Create files & split across 10 directories, 1000 each. + with self.assert_cluster_log("Throttler Limit has been hit. Some message processing may be significantly delayed.", + invert_match=False, watch_channel="cluster"): + for i in range(0, 10): + self.mount_a.create_n_files("dir{0}/file".format(i), 1000, sync=False) class TestCacheDrop(CephFSTestCase): CLIENTS_REQUIRED = 1 -- 2.39.5