From: Kotresh HR Date: Fri, 4 Nov 2022 10:36:20 +0000 (+0530) Subject: qa: Add test for per-module finisher thread X-Git-Tag: v16.2.15~194^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F51045%2Fhead;p=ceph.git qa: Add test for per-module finisher thread Fixes: https://tracker.ceph.com/issues/51177 Signed-off-by: Kotresh HR (cherry picked from commit 2c2ef6d56b583a7a27fd7e99359cdabb2fd37fb5) Conflicts: Removed 'telemetry diff' command used for testing as it's not present in pacific. --- diff --git a/qa/suites/fs/volumes/tasks/volumes/test/finisher_per_module.yaml b/qa/suites/fs/volumes/tasks/volumes/test/finisher_per_module.yaml new file mode 100644 index 0000000000000..ec8335fe0cfb3 --- /dev/null +++ b/qa/suites/fs/volumes/tasks/volumes/test/finisher_per_module.yaml @@ -0,0 +1,13 @@ +tasks: + - check-counter: + counters: + mgr: + - name: "finisher-volumes.complete_latency.avgcount" + min: 4 + - name: "finisher-volumes.queue_len" + expected_val: 0 + + - cephfs_test_runner: + fail_on_skip: false + modules: + - tasks.cephfs.test_volumes.TestPerModuleFinsherThread diff --git a/qa/suites/rados/mgr/tasks/per_module_finisher_stats.yaml b/qa/suites/rados/mgr/tasks/per_module_finisher_stats.yaml new file mode 100644 index 0000000000000..a2f087ab19bd7 --- /dev/null +++ b/qa/suites/rados/mgr/tasks/per_module_finisher_stats.yaml @@ -0,0 +1,43 @@ +tasks: + - install: + - ceph: + wait-for-scrub: false + - check-counter: + counters: + mgr: + - name: "finisher-balancer.complete_latency.avgcount" + min: 1 + - name: "finisher-balancer.queue_len" + expected_val: 0 + - name: "finisher-crash.complete_latency.avgcount" + min: 2 + - name: "finisher-crash.queue_len" + expected_val: 0 + - name: "finisher-devicehealth.complete_latency.avgcount" + min: 1 + - name: "finisher-devicehealth.queue_len" + expected_val: 0 + - name: "finisher-iostat.complete_latency.avgcount" + min: 1 + - name: "finisher-iostat.queue_len" + expected_val: 0 + - name: "finisher-pg_autoscaler.complete_latency.avgcount" + min: 1 + - name: "finisher-pg_autoscaler.queue_len" + expected_val: 0 + - name: "finisher-progress.complete_latency.avgcount" + min: 2 + - name: "finisher-progress.queue_len" + expected_val: 0 + - name: "finisher-status.complete_latency.avgcount" + min: 2 + - name: "finisher-status.queue_len" + expected_val: 0 + - name: "finisher-telemetry.complete_latency.avgcount" + min: 1 + - name: "finisher-telemetry.queue_len" + expected_val: 0 + - workunit: + clients: + client.0: + - mgr/test_per_module_finisher.sh diff --git a/qa/suites/rados/mgr/tasks/workunits.yaml b/qa/suites/rados/mgr/tasks/workunits.yaml index 6074de0edf360..791adc27298b9 100644 --- a/qa/suites/rados/mgr/tasks/workunits.yaml +++ b/qa/suites/rados/mgr/tasks/workunits.yaml @@ -13,4 +13,4 @@ tasks: - workunit: clients: client.0: - - mgr \ No newline at end of file + - mgr/test_localpool.sh diff --git a/qa/tasks/cephfs/test_volumes.py b/qa/tasks/cephfs/test_volumes.py index 7f17fe9e485b2..11fc238f5528a 100644 --- a/qa/tasks/cephfs/test_volumes.py +++ b/qa/tasks/cephfs/test_volumes.py @@ -7805,3 +7805,29 @@ class TestMisc(TestVolumesHelper): # remove group self._fs_cmd("subvolumegroup", "rm", self.volname, group) + +class TestPerModuleFinsherThread(TestVolumesHelper): + """ + Per module finisher thread tests related to mgr/volume cmds. + This is used in conjuction with check_counter with min val being 4 + as four subvolume cmds are run + """ + def test_volumes_module_finisher_thread(self): + subvol1, subvol2, subvol3 = self._generate_random_subvolume_name(3) + group = self._generate_random_group_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolumes in group + self._fs_cmd("subvolume", "create", self.volname, subvol1, "--group_name", group) + self._fs_cmd("subvolume", "create", self.volname, subvol2, "--group_name", group) + self._fs_cmd("subvolume", "create", self.volname, subvol3, "--group_name", group) + + self._fs_cmd("subvolume", "rm", self.volname, subvol1, group) + self._fs_cmd("subvolume", "rm", self.volname, subvol2, group) + self._fs_cmd("subvolume", "rm", self.volname, subvol3, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() diff --git a/qa/tasks/check_counter.py b/qa/tasks/check_counter.py index 6219cb0d8e386..40818f3f475d2 100644 --- a/qa/tasks/check_counter.py +++ b/qa/tasks/check_counter.py @@ -5,6 +5,8 @@ import json from teuthology.task import Task from teuthology import misc +from tasks import ceph_manager + log = logging.getLogger(__name__) @@ -35,6 +37,11 @@ class CheckCounter(Task): min: 3 - workunit: ... """ + @property + def admin_remote(self): + first_mon = misc.get_first_mon(self.ctx, None) + (result,) = self.ctx.cluster.only(first_mon).remotes.keys() + return result def start(self): log.info("START") @@ -50,6 +57,10 @@ class CheckCounter(Task): if cluster_name is None: cluster_name = next(iter(self.ctx.managers.keys())) + + mon_manager = ceph_manager.CephManager(self.admin_remote, ctx=self.ctx, logger=log.getChild('ceph_manager')) + active_mgr = json.loads(mon_manager.raw_cluster_cmd("mgr", "dump", "--format=json-pretty"))["active_name"] + for daemon_type, counters in targets.items(): # List of 'a', 'b', 'c'... daemon_ids = list(misc.all_roles_of_type(self.ctx.cluster, daemon_type)) @@ -64,6 +75,8 @@ class CheckCounter(Task): if not daemon.running(): log.info("Ignoring daemon {0}, it isn't running".format(daemon_id)) continue + elif daemon_type == 'mgr' and daemon_id != active_mgr: + continue else: log.debug("Getting stats from {0}".format(daemon_id)) @@ -76,10 +89,15 @@ class CheckCounter(Task): log.warning("No admin socket response from {0}, skipping".format(daemon_id)) continue + minval = '' + expected_val = '' for counter in counters: if isinstance(counter, dict): name = counter['name'] - minval = counter['min'] + if 'min' in counter: + minval = counter['min'] + if 'expected_val' in counter: + expected_val = counter['expected_val'] else: name = counter minval = 1 @@ -96,7 +114,9 @@ class CheckCounter(Task): if val is not None: log.info(f"Daemon {daemon_type}.{daemon_id} {name}={val}") - if val >= minval: + if isinstance(minval, int) and val >= minval: + seen.add(name) + elif isinstance(expected_val, int) and val == expected_val: seen.add(name) if not dry_run: diff --git a/qa/workunits/mgr/test_per_module_finisher.sh b/qa/workunits/mgr/test_per_module_finisher.sh new file mode 100755 index 0000000000000..09937bc02da9a --- /dev/null +++ b/qa/workunits/mgr/test_per_module_finisher.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +set -ex + +# This testcase tests the per module finisher stats for enabled modules +# using check counter (qa/tasks/check_counter.py). + +# 'balancer' commands +ceph balancer pool ls + +# 'crash' commands +ceph crash ls +ceph crash ls-new + +# 'device' commands +ceph device query-daemon-health-metrics mon.a + +# 'iostat' command +ceph iostat & +pid=$! +sleep 3 +kill -SIGTERM $pid + +# 'pg_autoscaler' command +ceph osd pool autoscale-status + +# 'progress' command +ceph progress +ceph progress json + +# 'status' commands +ceph fs status +ceph osd status + +# 'telemetry' commands +ceph telemetry status + +echo OK