]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
qa: Add test for per-module finisher thread 51045/head
authorKotresh HR <khiremat@redhat.com>
Fri, 4 Nov 2022 10:36:20 +0000 (16:06 +0530)
committerKotresh HR <khiremat@redhat.com>
Thu, 31 Aug 2023 06:17:54 +0000 (11:47 +0530)
Fixes: https://tracker.ceph.com/issues/51177
Signed-off-by: Kotresh HR <khiremat@redhat.com>
(cherry picked from commit 2c2ef6d56b583a7a27fd7e99359cdabb2fd37fb5)

Conflicts:
  Removed 'telemetry diff' command used for testing
as it's not present in pacific.

qa/suites/fs/volumes/tasks/volumes/test/finisher_per_module.yaml [new file with mode: 0644]
qa/suites/rados/mgr/tasks/per_module_finisher_stats.yaml [new file with mode: 0644]
qa/suites/rados/mgr/tasks/workunits.yaml
qa/tasks/cephfs/test_volumes.py
qa/tasks/check_counter.py
qa/workunits/mgr/test_per_module_finisher.sh [new file with mode: 0755]

diff --git a/qa/suites/fs/volumes/tasks/volumes/test/finisher_per_module.yaml b/qa/suites/fs/volumes/tasks/volumes/test/finisher_per_module.yaml
new file mode 100644 (file)
index 0000000..ec8335f
--- /dev/null
@@ -0,0 +1,13 @@
+tasks:
+  - check-counter:
+      counters:
+        mgr:
+            - name: "finisher-volumes.complete_latency.avgcount"
+              min: 4
+            - name: "finisher-volumes.queue_len"
+              expected_val: 0
+
+  - cephfs_test_runner:
+      fail_on_skip: false
+      modules:
+        - tasks.cephfs.test_volumes.TestPerModuleFinsherThread
diff --git a/qa/suites/rados/mgr/tasks/per_module_finisher_stats.yaml b/qa/suites/rados/mgr/tasks/per_module_finisher_stats.yaml
new file mode 100644 (file)
index 0000000..a2f087a
--- /dev/null
@@ -0,0 +1,43 @@
+tasks:
+  - install:
+  - ceph:
+      wait-for-scrub: false
+  - check-counter:
+      counters:
+        mgr:
+            - name: "finisher-balancer.complete_latency.avgcount"
+              min: 1
+            - name: "finisher-balancer.queue_len"
+              expected_val: 0
+            - name: "finisher-crash.complete_latency.avgcount"
+              min: 2
+            - name: "finisher-crash.queue_len"
+              expected_val: 0
+            - name: "finisher-devicehealth.complete_latency.avgcount"
+              min: 1
+            - name: "finisher-devicehealth.queue_len"
+              expected_val: 0
+            - name: "finisher-iostat.complete_latency.avgcount"
+              min: 1
+            - name: "finisher-iostat.queue_len"
+              expected_val: 0
+            - name: "finisher-pg_autoscaler.complete_latency.avgcount"
+              min: 1
+            - name: "finisher-pg_autoscaler.queue_len"
+              expected_val: 0
+            - name: "finisher-progress.complete_latency.avgcount"
+              min: 2
+            - name: "finisher-progress.queue_len"
+              expected_val: 0
+            - name: "finisher-status.complete_latency.avgcount"
+              min: 2
+            - name: "finisher-status.queue_len"
+              expected_val: 0
+            - name: "finisher-telemetry.complete_latency.avgcount"
+              min: 1
+            - name: "finisher-telemetry.queue_len"
+              expected_val: 0
+  - workunit:
+      clients:
+        client.0:
+          - mgr/test_per_module_finisher.sh
index 6074de0edf360bf1e70e4cabab008f3d7ec15538..791adc27298b9670d65d417ac1b163a62a2d97f7 100644 (file)
@@ -13,4 +13,4 @@ tasks:
   - workunit:
       clients:
         client.0:
-          - mgr
\ No newline at end of file
+          - mgr/test_localpool.sh
index 7f17fe9e485b2664300a690ecce07c4377766d59..11fc238f5528adeea9aa97fb1fcb4341ff1b62e2 100644 (file)
@@ -7805,3 +7805,29 @@ class TestMisc(TestVolumesHelper):
 
         # remove group
         self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+class TestPerModuleFinsherThread(TestVolumesHelper):
+    """
+    Per module finisher thread tests related to mgr/volume cmds.
+    This is used in conjuction with check_counter with min val being 4
+    as four subvolume cmds are run
+    """
+    def test_volumes_module_finisher_thread(self):
+        subvol1, subvol2, subvol3 = self._generate_random_subvolume_name(3)
+        group = self._generate_random_group_name()
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolumes in group
+        self._fs_cmd("subvolume", "create", self.volname, subvol1, "--group_name", group)
+        self._fs_cmd("subvolume", "create", self.volname, subvol2, "--group_name", group)
+        self._fs_cmd("subvolume", "create", self.volname, subvol3, "--group_name", group)
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvol1, group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvol2, group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvol3, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
index 6219cb0d8e386fce42f5486f2c487cebcb902ff1..40818f3f475d228eeff1f37ff98d27261934ae80 100644 (file)
@@ -5,6 +5,8 @@ import json
 from teuthology.task import Task
 from teuthology import misc
 
+from tasks import ceph_manager
+
 log = logging.getLogger(__name__)
 
 
@@ -35,6 +37,11 @@ class CheckCounter(Task):
                     min: 3
     - workunit: ...
     """
+    @property
+    def admin_remote(self):
+        first_mon = misc.get_first_mon(self.ctx, None)
+        (result,) = self.ctx.cluster.only(first_mon).remotes.keys()
+        return result
 
     def start(self):
         log.info("START")
@@ -50,6 +57,10 @@ class CheckCounter(Task):
         if cluster_name is None:
             cluster_name = next(iter(self.ctx.managers.keys()))
 
+
+        mon_manager = ceph_manager.CephManager(self.admin_remote, ctx=self.ctx, logger=log.getChild('ceph_manager'))
+        active_mgr = json.loads(mon_manager.raw_cluster_cmd("mgr", "dump", "--format=json-pretty"))["active_name"]
+
         for daemon_type, counters in targets.items():
             # List of 'a', 'b', 'c'...
             daemon_ids = list(misc.all_roles_of_type(self.ctx.cluster, daemon_type))
@@ -64,6 +75,8 @@ class CheckCounter(Task):
                 if not daemon.running():
                     log.info("Ignoring daemon {0}, it isn't running".format(daemon_id))
                     continue
+                elif daemon_type == 'mgr' and daemon_id != active_mgr:
+                    continue
                 else:
                     log.debug("Getting stats from {0}".format(daemon_id))
 
@@ -76,10 +89,15 @@ class CheckCounter(Task):
                     log.warning("No admin socket response from {0}, skipping".format(daemon_id))
                     continue
 
+                minval = ''
+                expected_val = ''
                 for counter in counters:
                     if isinstance(counter, dict):
                         name = counter['name']
-                        minval = counter['min']
+                        if 'min' in counter:
+                            minval = counter['min']
+                        if 'expected_val' in counter:
+                            expected_val = counter['expected_val']
                     else:
                         name = counter
                         minval = 1
@@ -96,7 +114,9 @@ class CheckCounter(Task):
 
                     if val is not None:
                         log.info(f"Daemon {daemon_type}.{daemon_id} {name}={val}")
-                        if val >= minval:
+                        if isinstance(minval, int) and val >= minval:
+                            seen.add(name)
+                        elif isinstance(expected_val, int) and val == expected_val:
                             seen.add(name)
 
             if not dry_run:
diff --git a/qa/workunits/mgr/test_per_module_finisher.sh b/qa/workunits/mgr/test_per_module_finisher.sh
new file mode 100755 (executable)
index 0000000..09937bc
--- /dev/null
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+set -ex
+
+# This testcase tests the per module finisher stats for enabled modules
+# using check counter (qa/tasks/check_counter.py).
+
+# 'balancer' commands
+ceph balancer pool ls
+
+# 'crash' commands
+ceph crash ls
+ceph crash ls-new
+
+# 'device' commands
+ceph device query-daemon-health-metrics mon.a
+
+# 'iostat' command
+ceph iostat &
+pid=$!
+sleep 3
+kill -SIGTERM $pid
+
+# 'pg_autoscaler' command
+ceph osd pool autoscale-status
+
+# 'progress' command
+ceph progress
+ceph progress json
+
+# 'status' commands
+ceph fs status
+ceph osd status
+
+# 'telemetry' commands
+ceph telemetry status
+
+echo OK