From 2f48dc9a0034f28d06640fb6eb375aaf30d6b87d Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Tue, 23 Apr 2024 10:39:10 -0400 Subject: [PATCH] qa: reorganize mgr unit tests Refactor common tasks and allow loading mgrmodules before unittests start. Signed-off-by: Patrick Donnelly --- qa/suites/rados/mgr/tasks/% | 0 qa/suites/rados/mgr/tasks/1-install.yaml | 2 + .../mgr/tasks/{failover.yaml => 2-ceph.yaml} | 5 --- qa/suites/rados/mgr/tasks/3-mgrmodules.yaml | 8 ++++ qa/suites/rados/mgr/tasks/4-units/.qa | 1 + qa/suites/rados/mgr/tasks/4-units/crash.yaml | 9 ++++ .../rados/mgr/tasks/4-units/failover.yaml | 4 ++ .../rados/mgr/tasks/4-units/insights.yaml | 14 ++++++ .../mgr/tasks/4-units/module_selftest.yaml | 18 ++++++++ .../4-units/per_module_finisher_stats.yaml | 41 +++++++++++++++++ .../rados/mgr/tasks/4-units/progress.yaml | 18 ++++++++ .../rados/mgr/tasks/4-units/prometheus.yaml | 4 ++ .../rados/mgr/tasks/4-units/workunits.yaml | 5 +++ qa/suites/rados/mgr/tasks/crash.yaml | 18 -------- qa/suites/rados/mgr/tasks/insights.yaml | 20 --------- .../rados/mgr/tasks/module_selftest.yaml | 28 ------------ .../mgr/tasks/per_module_finisher_stats.yaml | 45 ------------------- qa/suites/rados/mgr/tasks/progress.yaml | 30 ------------- qa/suites/rados/mgr/tasks/prometheus.yaml | 17 ------- qa/suites/rados/mgr/tasks/workunits.yaml | 17 ------- 20 files changed, 124 insertions(+), 180 deletions(-) create mode 100644 qa/suites/rados/mgr/tasks/% create mode 100644 qa/suites/rados/mgr/tasks/1-install.yaml rename qa/suites/rados/mgr/tasks/{failover.yaml => 2-ceph.yaml} (80%) create mode 100644 qa/suites/rados/mgr/tasks/3-mgrmodules.yaml create mode 120000 qa/suites/rados/mgr/tasks/4-units/.qa create mode 100644 qa/suites/rados/mgr/tasks/4-units/crash.yaml create mode 100644 qa/suites/rados/mgr/tasks/4-units/failover.yaml create mode 100644 qa/suites/rados/mgr/tasks/4-units/insights.yaml create mode 100644 qa/suites/rados/mgr/tasks/4-units/module_selftest.yaml create mode 100644 qa/suites/rados/mgr/tasks/4-units/per_module_finisher_stats.yaml create mode 100644 qa/suites/rados/mgr/tasks/4-units/progress.yaml create mode 100644 qa/suites/rados/mgr/tasks/4-units/prometheus.yaml create mode 100644 qa/suites/rados/mgr/tasks/4-units/workunits.yaml delete mode 100644 qa/suites/rados/mgr/tasks/crash.yaml delete mode 100644 qa/suites/rados/mgr/tasks/insights.yaml delete mode 100644 qa/suites/rados/mgr/tasks/module_selftest.yaml delete mode 100644 qa/suites/rados/mgr/tasks/per_module_finisher_stats.yaml delete mode 100644 qa/suites/rados/mgr/tasks/progress.yaml delete mode 100644 qa/suites/rados/mgr/tasks/prometheus.yaml delete mode 100644 qa/suites/rados/mgr/tasks/workunits.yaml diff --git a/qa/suites/rados/mgr/tasks/% b/qa/suites/rados/mgr/tasks/% new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/qa/suites/rados/mgr/tasks/1-install.yaml b/qa/suites/rados/mgr/tasks/1-install.yaml new file mode 100644 index 0000000000000..6c48c5275a05c --- /dev/null +++ b/qa/suites/rados/mgr/tasks/1-install.yaml @@ -0,0 +1,2 @@ +tasks: + - install: diff --git a/qa/suites/rados/mgr/tasks/failover.yaml b/qa/suites/rados/mgr/tasks/2-ceph.yaml similarity index 80% rename from qa/suites/rados/mgr/tasks/failover.yaml rename to qa/suites/rados/mgr/tasks/2-ceph.yaml index 6d1e0d55764e3..7aa9eefc3a2bf 100644 --- a/qa/suites/rados/mgr/tasks/failover.yaml +++ b/qa/suites/rados/mgr/tasks/2-ceph.yaml @@ -1,6 +1,4 @@ - tasks: - - install: - ceph: # tests may leave mgrs broken, so don't try and call into them # to invoke e.g. pg dump during teardown. @@ -12,6 +10,3 @@ tasks: - replacing it with standby - No standby daemons available - \(POOL_APP_NOT_ENABLED\) - - cephfs_test_runner: - modules: - - tasks.mgr.test_failover diff --git a/qa/suites/rados/mgr/tasks/3-mgrmodules.yaml b/qa/suites/rados/mgr/tasks/3-mgrmodules.yaml new file mode 100644 index 0000000000000..c437c60ccfd16 --- /dev/null +++ b/qa/suites/rados/mgr/tasks/3-mgrmodules.yaml @@ -0,0 +1,8 @@ +mgrmodules: + sequential: + - print: "Enabling mgr modules" + # other fragments append to this + +tasks: + - sequential: + - mgrmodules diff --git a/qa/suites/rados/mgr/tasks/4-units/.qa b/qa/suites/rados/mgr/tasks/4-units/.qa new file mode 120000 index 0000000000000..fea2489fdf6d9 --- /dev/null +++ b/qa/suites/rados/mgr/tasks/4-units/.qa @@ -0,0 +1 @@ +../.qa \ No newline at end of file diff --git a/qa/suites/rados/mgr/tasks/4-units/crash.yaml b/qa/suites/rados/mgr/tasks/4-units/crash.yaml new file mode 100644 index 0000000000000..5d2ab70ba4a7d --- /dev/null +++ b/qa/suites/rados/mgr/tasks/4-units/crash.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + log-ignorelist: + - \(RECENT_CRASH\) + +tasks: + - cephfs_test_runner: + modules: + - tasks.mgr.test_crash diff --git a/qa/suites/rados/mgr/tasks/4-units/failover.yaml b/qa/suites/rados/mgr/tasks/4-units/failover.yaml new file mode 100644 index 0000000000000..e54d7a17023b4 --- /dev/null +++ b/qa/suites/rados/mgr/tasks/4-units/failover.yaml @@ -0,0 +1,4 @@ +tasks: + - cephfs_test_runner: + modules: + - tasks.mgr.test_failover diff --git a/qa/suites/rados/mgr/tasks/4-units/insights.yaml b/qa/suites/rados/mgr/tasks/4-units/insights.yaml new file mode 100644 index 0000000000000..bb1252e7193ea --- /dev/null +++ b/qa/suites/rados/mgr/tasks/4-units/insights.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + # tests may leave mgrs broken, so don't try and call into them + # to invoke e.g. pg dump during teardown. + wait-for-scrub: false + log-ignorelist: + - \(MGR_INSIGHTS_WARNING\) + - \(insights_health_check + - \(RECENT_CRASH\) + +tasks: + - cephfs_test_runner: + modules: + - tasks.mgr.test_insights diff --git a/qa/suites/rados/mgr/tasks/4-units/module_selftest.yaml b/qa/suites/rados/mgr/tasks/4-units/module_selftest.yaml new file mode 100644 index 0000000000000..1eb4a184dcac0 --- /dev/null +++ b/qa/suites/rados/mgr/tasks/4-units/module_selftest.yaml @@ -0,0 +1,18 @@ +overrides: + ceph: + log-ignorelist: + - Reduced data availability + - Degraded data redundancy + - objects misplaced + - Synthetic exception in serve + - influxdb python module not found + - \(MGR_ZABBIX_ + - foo bar + - Failed to open Telegraf + - evicting unresponsive client + - 1 mgr modules have recently crashed \(RECENT_MGR_MODULE_CRASH\) +tasks: + - cephfs_test_runner: + modules: + - tasks.mgr.test_module_selftest + fail_on_skip: false diff --git a/qa/suites/rados/mgr/tasks/4-units/per_module_finisher_stats.yaml b/qa/suites/rados/mgr/tasks/4-units/per_module_finisher_stats.yaml new file mode 100644 index 0000000000000..e990e628b476b --- /dev/null +++ b/qa/suites/rados/mgr/tasks/4-units/per_module_finisher_stats.yaml @@ -0,0 +1,41 @@ +overrides: + check-counter: + counters: + mgr: + - name: "finisher-balancer.complete_latency.avgcount" + min: 1 + - name: "finisher-balancer.queue_len" + expected_val: 0 + - name: "finisher-crash.complete_latency.avgcount" + min: 2 + - name: "finisher-crash.queue_len" + expected_val: 0 + - name: "finisher-devicehealth.complete_latency.avgcount" + min: 1 + - name: "finisher-devicehealth.queue_len" + expected_val: 0 + - name: "finisher-iostat.complete_latency.avgcount" + min: 1 + - name: "finisher-iostat.queue_len" + expected_val: 0 + - name: "finisher-pg_autoscaler.complete_latency.avgcount" + min: 1 + - name: "finisher-pg_autoscaler.queue_len" + expected_val: 0 + - name: "finisher-progress.complete_latency.avgcount" + min: 2 + - name: "finisher-progress.queue_len" + expected_val: 0 + - name: "finisher-status.complete_latency.avgcount" + min: 2 + - name: "finisher-status.queue_len" + expected_val: 0 + - name: "finisher-telemetry.complete_latency.avgcount" + min: 2 + - name: "finisher-telemetry.queue_len" + expected_val: 0 +tasks: + - workunit: + clients: + client.0: + - mgr/test_per_module_finisher.sh diff --git a/qa/suites/rados/mgr/tasks/4-units/progress.yaml b/qa/suites/rados/mgr/tasks/4-units/progress.yaml new file mode 100644 index 0000000000000..6ed4f442955f5 --- /dev/null +++ b/qa/suites/rados/mgr/tasks/4-units/progress.yaml @@ -0,0 +1,18 @@ +overrides: + ceph: + conf: + osd: + osd mclock profile: high_recovery_ops + global: + osd pool default size : 3 + osd pool default min size : 2 + log-ignorelist: + - \(MDS_ALL_DOWN\) + - \(MDS_UP_LESS_THAN_MAX\) + - \(FS_WITH_FAILED_MDS\) + - \(FS_DEGRADED\) + - \(OSDMAP_FLAGS\) +tasks: + - cephfs_test_runner: + modules: + - tasks.mgr.test_progress diff --git a/qa/suites/rados/mgr/tasks/4-units/prometheus.yaml b/qa/suites/rados/mgr/tasks/4-units/prometheus.yaml new file mode 100644 index 0000000000000..f639e16879a31 --- /dev/null +++ b/qa/suites/rados/mgr/tasks/4-units/prometheus.yaml @@ -0,0 +1,4 @@ +tasks: + - cephfs_test_runner: + modules: + - tasks.mgr.test_prometheus diff --git a/qa/suites/rados/mgr/tasks/4-units/workunits.yaml b/qa/suites/rados/mgr/tasks/4-units/workunits.yaml new file mode 100644 index 0000000000000..21855b14933f0 --- /dev/null +++ b/qa/suites/rados/mgr/tasks/4-units/workunits.yaml @@ -0,0 +1,5 @@ +tasks: + - workunit: + clients: + client.0: + - mgr/test_localpool.sh diff --git a/qa/suites/rados/mgr/tasks/crash.yaml b/qa/suites/rados/mgr/tasks/crash.yaml deleted file mode 100644 index 9d2ba535efa55..0000000000000 --- a/qa/suites/rados/mgr/tasks/crash.yaml +++ /dev/null @@ -1,18 +0,0 @@ - -tasks: - - install: - - ceph: - # tests may leave mgrs broken, so don't try and call into them - # to invoke e.g. pg dump during teardown. - wait-for-scrub: false - log-ignorelist: - - overall HEALTH_ - - \(MGR_DOWN\) - - \(PG_ - - \(RECENT_CRASH\) - - replacing it with standby - - No standby daemons available - - \(POOL_APP_NOT_ENABLED\) - - cephfs_test_runner: - modules: - - tasks.mgr.test_crash diff --git a/qa/suites/rados/mgr/tasks/insights.yaml b/qa/suites/rados/mgr/tasks/insights.yaml deleted file mode 100644 index f7c82cf7f2c89..0000000000000 --- a/qa/suites/rados/mgr/tasks/insights.yaml +++ /dev/null @@ -1,20 +0,0 @@ - -tasks: - - install: - - ceph: - # tests may leave mgrs broken, so don't try and call into them - # to invoke e.g. pg dump during teardown. - wait-for-scrub: false - log-ignorelist: - - overall HEALTH_ - - \(MGR_DOWN\) - - \(MGR_INSIGHTS_WARNING\) - - \(insights_health_check - - \(PG_ - - \(RECENT_CRASH\) - - replacing it with standby - - No standby daemons available - - \(POOL_APP_NOT_ENABLED\) - - cephfs_test_runner: - modules: - - tasks.mgr.test_insights diff --git a/qa/suites/rados/mgr/tasks/module_selftest.yaml b/qa/suites/rados/mgr/tasks/module_selftest.yaml deleted file mode 100644 index 4403d9fffc30b..0000000000000 --- a/qa/suites/rados/mgr/tasks/module_selftest.yaml +++ /dev/null @@ -1,28 +0,0 @@ - -tasks: - - install: - - ceph: - # tests may leave mgrs broken, so don't try and call into them - # to invoke e.g. pg dump during teardown. - wait-for-scrub: false - log-ignorelist: - - overall HEALTH_ - - \(MGR_DOWN\) - - \(PG_ - - replacing it with standby - - No standby daemons available - - Reduced data availability - - Degraded data redundancy - - objects misplaced - - Synthetic exception in serve - - influxdb python module not found - - \(MGR_ZABBIX_ - - foo bar - - Failed to open Telegraf - - evicting unresponsive client - - 1 mgr modules have recently crashed \(RECENT_MGR_MODULE_CRASH\) - - \(POOL_APP_NOT_ENABLED\) - - cephfs_test_runner: - modules: - - tasks.mgr.test_module_selftest - fail_on_skip: false diff --git a/qa/suites/rados/mgr/tasks/per_module_finisher_stats.yaml b/qa/suites/rados/mgr/tasks/per_module_finisher_stats.yaml deleted file mode 100644 index de1d592df5f26..0000000000000 --- a/qa/suites/rados/mgr/tasks/per_module_finisher_stats.yaml +++ /dev/null @@ -1,45 +0,0 @@ -tasks: - - install: - - ceph: - wait-for-scrub: false - log-ignorelist: - - \(POOL_APP_NOT_ENABLED\) - - check-counter: - counters: - mgr: - - name: "finisher-balancer.complete_latency.avgcount" - min: 1 - - name: "finisher-balancer.queue_len" - expected_val: 0 - - name: "finisher-crash.complete_latency.avgcount" - min: 2 - - name: "finisher-crash.queue_len" - expected_val: 0 - - name: "finisher-devicehealth.complete_latency.avgcount" - min: 1 - - name: "finisher-devicehealth.queue_len" - expected_val: 0 - - name: "finisher-iostat.complete_latency.avgcount" - min: 1 - - name: "finisher-iostat.queue_len" - expected_val: 0 - - name: "finisher-pg_autoscaler.complete_latency.avgcount" - min: 1 - - name: "finisher-pg_autoscaler.queue_len" - expected_val: 0 - - name: "finisher-progress.complete_latency.avgcount" - min: 2 - - name: "finisher-progress.queue_len" - expected_val: 0 - - name: "finisher-status.complete_latency.avgcount" - min: 2 - - name: "finisher-status.queue_len" - expected_val: 0 - - name: "finisher-telemetry.complete_latency.avgcount" - min: 2 - - name: "finisher-telemetry.queue_len" - expected_val: 0 - - workunit: - clients: - client.0: - - mgr/test_per_module_finisher.sh diff --git a/qa/suites/rados/mgr/tasks/progress.yaml b/qa/suites/rados/mgr/tasks/progress.yaml deleted file mode 100644 index 183a9a29a2ef0..0000000000000 --- a/qa/suites/rados/mgr/tasks/progress.yaml +++ /dev/null @@ -1,30 +0,0 @@ -overrides: - ceph: - conf: - osd: - osd mclock profile: high_recovery_ops -tasks: - - install: - - ceph: - config: - global: - osd pool default size : 3 - osd pool default min size : 2 - # tests may leave mgrs broken, so don't try and call into them - # to invoke e.g. pg dump during teardown. - wait-for-scrub: false - log-ignorelist: - - overall HEALTH_ - - \(MGR_DOWN\) - - \(MDS_ALL_DOWN\) - - \(MDS_UP_LESS_THAN_MAX\) - - \(FS_WITH_FAILED_MDS\) - - \(FS_DEGRADED\) - - \(PG_ - - \(OSDMAP_FLAGS\) - - replacing it with standby - - No standby daemons available - - \(POOL_APP_NOT_ENABLED\) - - cephfs_test_runner: - modules: - - tasks.mgr.test_progress diff --git a/qa/suites/rados/mgr/tasks/prometheus.yaml b/qa/suites/rados/mgr/tasks/prometheus.yaml deleted file mode 100644 index fd0e23a355583..0000000000000 --- a/qa/suites/rados/mgr/tasks/prometheus.yaml +++ /dev/null @@ -1,17 +0,0 @@ - -tasks: - - install: - - ceph: - # tests may leave mgrs broken, so don't try and call into them - # to invoke e.g. pg dump during teardown. - wait-for-scrub: false - log-ignorelist: - - overall HEALTH_ - - \(MGR_DOWN\) - - \(PG_ - - replacing it with standby - - No standby daemons available - - \(POOL_APP_NOT_ENABLED\) - - cephfs_test_runner: - modules: - - tasks.mgr.test_prometheus diff --git a/qa/suites/rados/mgr/tasks/workunits.yaml b/qa/suites/rados/mgr/tasks/workunits.yaml deleted file mode 100644 index a48274033486f..0000000000000 --- a/qa/suites/rados/mgr/tasks/workunits.yaml +++ /dev/null @@ -1,17 +0,0 @@ -tasks: - - install: - - ceph: - # tests may leave mgrs broken, so don't try and call into them - # to invoke e.g. pg dump during teardown. - wait-for-scrub: false - log-ignorelist: - - overall HEALTH_ - - \(MGR_DOWN\) - - \(PG_ - - replacing it with standby - - No standby daemons available - - \(POOL_APP_NOT_ENABLED\) - - workunit: - clients: - client.0: - - mgr/test_localpool.sh -- 2.39.5