qa: Add test for per-module finisher thread

author Kotresh HR <khiremat@redhat.com>

Fri, 4 Nov 2022 10:36:20 +0000 (16:06 +0530)

committer Kotresh HR <khiremat@redhat.com>

Thu, 31 Aug 2023 06:17:54 +0000 (11:47 +0530)
author Kotresh HR <khiremat@redhat.com>
Fri, 4 Nov 2022 10:36:20 +0000 (16:06 +0530)
committer Kotresh HR <khiremat@redhat.com>
Thu, 31 Aug 2023 06:17:54 +0000 (11:47 +0530)
diff --git a/qa/suites/fs/volumes/tasks/volumes/test/finisher_per_module.yaml b/qa/suites/fs/volumes/tasks/volumes/test/finisher_per_module.yaml

new file mode 100644 (file)

index 0000000..ec8335f
--- /dev/null
+++ b/qa/suites/fs/volumes/tasks/volumes/test/finisher_per_module.yaml
@@ -0,0 +1,13 @@
+tasks:
+  - check-counter:
+      counters:
+        mgr:
+            - name: "finisher-volumes.complete_latency.avgcount"
+              min: 4
+            - name: "finisher-volumes.queue_len"
+              expected_val: 0
+
+  - cephfs_test_runner:
+      fail_on_skip: false
+      modules:
+        - tasks.cephfs.test_volumes.TestPerModuleFinsherThread
diff --git a/qa/suites/rados/mgr/tasks/per_module_finisher_stats.yaml b/qa/suites/rados/mgr/tasks/per_module_finisher_stats.yaml

new file mode 100644 (file)

index 0000000..a2f087a
--- /dev/null
+++ b/qa/suites/rados/mgr/tasks/per_module_finisher_stats.yaml
@@ -0,0 +1,43 @@
+tasks:
+  - install:
+  - ceph:
+      wait-for-scrub: false
+  - check-counter:
+      counters:
+        mgr:
+            - name: "finisher-balancer.complete_latency.avgcount"
+              min: 1
+            - name: "finisher-balancer.queue_len"
+              expected_val: 0
+            - name: "finisher-crash.complete_latency.avgcount"
+              min: 2
+            - name: "finisher-crash.queue_len"
+              expected_val: 0
+            - name: "finisher-devicehealth.complete_latency.avgcount"
+              min: 1
+            - name: "finisher-devicehealth.queue_len"
+              expected_val: 0
+            - name: "finisher-iostat.complete_latency.avgcount"
+              min: 1
+            - name: "finisher-iostat.queue_len"
+              expected_val: 0
+            - name: "finisher-pg_autoscaler.complete_latency.avgcount"
+              min: 1
+            - name: "finisher-pg_autoscaler.queue_len"
+              expected_val: 0
+            - name: "finisher-progress.complete_latency.avgcount"
+              min: 2
+            - name: "finisher-progress.queue_len"
+              expected_val: 0
+            - name: "finisher-status.complete_latency.avgcount"
+              min: 2
+            - name: "finisher-status.queue_len"
+              expected_val: 0
+            - name: "finisher-telemetry.complete_latency.avgcount"
+              min: 1
+            - name: "finisher-telemetry.queue_len"
+              expected_val: 0
+  - workunit:
+      clients:
+        client.0:
+          - mgr/test_per_module_finisher.sh
diff --git a/qa/suites/rados/mgr/tasks/workunits.yaml b/qa/suites/rados/mgr/tasks/workunits.yaml

index 6074de0edf360bf1e70e4cabab008f3d7ec15538..791adc27298b9670d65d417ac1b163a62a2d97f7 100644 (file)
--- a/qa/suites/rados/mgr/tasks/workunits.yaml
+++ b/qa/suites/rados/mgr/tasks/workunits.yaml
@@ -13,4 +13,4 @@ tasks:
    - workunit:
        clients:
          client.0:
-          - mgr
-\ No newline at end of file
+          - mgr/test_localpool.sh
diff --git a/qa/tasks/cephfs/test_volumes.py b/qa/tasks/cephfs/test_volumes.py

index 7f17fe9e485b2664300a690ecce07c4377766d59..11fc238f5528adeea9aa97fb1fcb4341ff1b62e2 100644 (file)
--- a/qa/tasks/cephfs/test_volumes.py
+++ b/qa/tasks/cephfs/test_volumes.py
@@ -7805,3 +7805,29 @@ class TestMisc(TestVolumesHelper):
  
          # remove group
          self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+class TestPerModuleFinsherThread(TestVolumesHelper):
+    """
+    Per module finisher thread tests related to mgr/volume cmds.
+    This is used in conjuction with check_counter with min val being 4
+    as four subvolume cmds are run
+    """
+    def test_volumes_module_finisher_thread(self):
+        subvol1, subvol2, subvol3 = self._generate_random_subvolume_name(3)
+        group = self._generate_random_group_name()
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolumes in group
+        self._fs_cmd("subvolume", "create", self.volname, subvol1, "--group_name", group)
+        self._fs_cmd("subvolume", "create", self.volname, subvol2, "--group_name", group)
+        self._fs_cmd("subvolume", "create", self.volname, subvol3, "--group_name", group)
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvol1, group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvol2, group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvol3, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
diff --git a/qa/tasks/check_counter.py b/qa/tasks/check_counter.py

index 6219cb0d8e386fce42f5486f2c487cebcb902ff1..40818f3f475d228eeff1f37ff98d27261934ae80 100644 (file)
--- a/qa/tasks/check_counter.py
+++ b/qa/tasks/check_counter.py
@@ -5,6 +5,8 @@ import json
  from teuthology.task import Task
  from teuthology import misc
  
+from tasks import ceph_manager
+
  log = logging.getLogger(__name__)
  
  
@@ -35,6 +37,11 @@ class CheckCounter(Task):
                      min: 3
      - workunit: ...
      """
+    @property
+    def admin_remote(self):
+        first_mon = misc.get_first_mon(self.ctx, None)
+        (result,) = self.ctx.cluster.only(first_mon).remotes.keys()
+        return result
  
      def start(self):
          log.info("START")
@@ -50,6 +57,10 @@ class CheckCounter(Task):
          if cluster_name is None:
              cluster_name = next(iter(self.ctx.managers.keys()))
  
+
+        mon_manager = ceph_manager.CephManager(self.admin_remote, ctx=self.ctx, logger=log.getChild('ceph_manager'))
+        active_mgr = json.loads(mon_manager.raw_cluster_cmd("mgr", "dump", "--format=json-pretty"))["active_name"]
+
          for daemon_type, counters in targets.items():
              # List of 'a', 'b', 'c'...
              daemon_ids = list(misc.all_roles_of_type(self.ctx.cluster, daemon_type))
@@ -64,6 +75,8 @@ class CheckCounter(Task):
                  if not daemon.running():
                      log.info("Ignoring daemon {0}, it isn't running".format(daemon_id))
                      continue
+                elif daemon_type == 'mgr' and daemon_id != active_mgr:
+                    continue
                  else:
                      log.debug("Getting stats from {0}".format(daemon_id))
  
@@ -76,10 +89,15 @@ class CheckCounter(Task):
                      log.warning("No admin socket response from {0}, skipping".format(daemon_id))
                      continue
  
+                minval = ''
+                expected_val = ''
                  for counter in counters:
                      if isinstance(counter, dict):
                          name = counter['name']
-                        minval = counter['min']
+                        if 'min' in counter:
+                            minval = counter['min']
+                        if 'expected_val' in counter:
+                            expected_val = counter['expected_val']
                      else:
                          name = counter
                          minval = 1
@@ -96,7 +114,9 @@ class CheckCounter(Task):
  
                      if val is not None:
                          log.info(f"Daemon {daemon_type}.{daemon_id} {name}={val}")
-                        if val >= minval:
+                        if isinstance(minval, int) and val >= minval:
+                            seen.add(name)
+                        elif isinstance(expected_val, int) and val == expected_val:
                              seen.add(name)
  
              if not dry_run:
diff --git a/qa/workunits/mgr/test_per_module_finisher.sh b/qa/workunits/mgr/test_per_module_finisher.sh

new file mode 100755 (executable)

index 0000000..09937bc
--- /dev/null
+++ b/qa/workunits/mgr/test_per_module_finisher.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+set -ex
+
+# This testcase tests the per module finisher stats for enabled modules
+# using check counter (qa/tasks/check_counter.py).
+
+# 'balancer' commands
+ceph balancer pool ls
+
+# 'crash' commands
+ceph crash ls
+ceph crash ls-new
+
+# 'device' commands
+ceph device query-daemon-health-metrics mon.a
+
+# 'iostat' command
+ceph iostat &
+pid=$!
+sleep 3
+kill -SIGTERM $pid
+
+# 'pg_autoscaler' command
+ceph osd pool autoscale-status
+
+# 'progress' command
+ceph progress
+ceph progress json
+
+# 'status' commands
+ceph fs status
+ceph osd status
+
+# 'telemetry' commands
+ceph telemetry status
+
+echo OK
author	Kotresh HR <khiremat@redhat.com>
	Fri, 4 Nov 2022 10:36:20 +0000 (16:06 +0530)
committer	Kotresh HR <khiremat@redhat.com>
	Thu, 31 Aug 2023 06:17:54 +0000 (11:47 +0530)
qa/suites/fs/volumes/tasks/volumes/test/finisher_per_module.yaml	[new file with mode: 0644]	patch \| blob
qa/suites/rados/mgr/tasks/per_module_finisher_stats.yaml	[new file with mode: 0644]	patch \| blob
qa/suites/rados/mgr/tasks/workunits.yaml		patch \| blob \| history
qa/tasks/cephfs/test_volumes.py		patch \| blob \| history
qa/tasks/check_counter.py		patch \| blob \| history
qa/workunits/mgr/test_per_module_finisher.sh	[new file with mode: 0755]	patch \| blob