From ec8a40b08f1a4165aadf5701031500476fb76496 Mon Sep 17 00:00:00 2001 From: Kefu Chai Date: Wed, 21 Jul 2021 18:45:37 +0800 Subject: [PATCH] qa/tasks/mgr: clean crash reports before waiting for clean otherwise we have following warning in health report {"status":"HEALTH_WARN","checks":{"RECENT_MGR_MODULE_CRASH":{"severity":"HEALTH_WARN","summary":{"message":"1 mgr modules have recently crashed","count":1},"muted":false}},"mutes":[]} and it does not disappear after the test waits for 30 seconds. and the tasks.mgr.test_module_selftest.TestModuleSelftest test fails like: 2021-07-21T09:59:52.560 INFO:tasks.cephfs_test_runner:====================================================================== 2021-07-21T09:59:52.561 INFO:tasks.cephfs_test_runner:ERROR: test_module_commands (tasks.mgr.test_module_selftest.TestModuleSelftest) 2021-07-21T09:59:52.561 INFO:tasks.cephfs_test_runner:---------------------------------------------------------------------- 2021-07-21T09:59:52.561 INFO:tasks.cephfs_test_runner:Traceback (most recent call last): 2021-07-21T09:59:52.562 INFO:tasks.cephfs_test_runner: File "/home/teuthworker/src/git.ceph.com_ceph-c_6a5d5abc027f706687dec92f92ff6fc6f074d2ae/qa/tasks/mgr/test_module_selftest.py", line 201, in test_mo dule_commands 2021-07-21T09:59:52.562 INFO:tasks.cephfs_test_runner: self.wait_for_health_clear(timeout=30) 2021-07-21T09:59:52.562 INFO:tasks.cephfs_test_runner: File "/home/teuthworker/src/git.ceph.com_ceph-c_6a5d5abc027f706687dec92f92ff6fc6f074d2ae/qa/tasks/ceph_test_case.py", line 172, in wait_for_health_c lear 2021-07-21T09:59:52.563 INFO:tasks.cephfs_test_runner: self.wait_until_true(is_clear, timeout) 2021-07-21T09:59:52.563 INFO:tasks.cephfs_test_runner: File "/home/teuthworker/src/git.ceph.com_ceph-c_6a5d5abc027f706687dec92f92ff6fc6f074d2ae/qa/tasks/ceph_test_case.py", line 209, in wait_until_true 2021-07-21T09:59:52.563 INFO:tasks.cephfs_test_runner: raise TestTimeoutError("Timed out after {0}s and {1} retries".format(elapsed, retry_count)) 2021-07-21T09:59:52.564 INFO:tasks.cephfs_test_runner:tasks.ceph_test_case.TestTimeoutError: Timed out after 30s and 0 retries in this change, the crash reports are nuked right after we see the warning, so that we can have a clean health report. Fixes: https://tracker.ceph.com/issues/51743 Signed-off-by: Kefu Chai --- qa/tasks/mgr/test_module_selftest.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/qa/tasks/mgr/test_module_selftest.py b/qa/tasks/mgr/test_module_selftest.py index 523a59f38bd..0f64adbd885 100644 --- a/qa/tasks/mgr/test_module_selftest.py +++ b/qa/tasks/mgr/test_module_selftest.py @@ -194,7 +194,10 @@ class TestModuleSelftest(MgrTestCase): self.wait_for_health( "Module 'selftest' has failed: Synthetic exception in serve", timeout=30) - + # prune the crash reports, so that the health report is back to + # clean + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "crash", "prune", "0") self.mgr_cluster.mon_manager.raw_cluster_cmd( "mgr", "module", "disable", "selftest") -- 2.39.5