From 7b7e5d4221389ce077780450d78a4a1e6a3d0a43 Mon Sep 17 00:00:00 2001 From: Jos Collin Date: Thu, 29 Feb 2024 16:20:03 +0530 Subject: [PATCH] qa: enhance labeled perf counters test for cephfs-mirror Implements checks for labeled perf counters in the appropriate tests. This patch verifies snaps_synced, snaps_renamed, snaps_deleted and sync_failures metrics are updated correctly based on the tests. Fixes: https://tracker.ceph.com/issues/64486 Signed-off-by: Jos Collin --- qa/tasks/cephfs/test_mirroring.py | 111 +++++++++++++++++++++++++++++- 1 file changed, 110 insertions(+), 1 deletion(-) diff --git a/qa/tasks/cephfs/test_mirroring.py b/qa/tasks/cephfs/test_mirroring.py index bad2d2daffdc2..6e57df5d0a846 100644 --- a/qa/tasks/cephfs/test_mirroring.py +++ b/qa/tasks/cephfs/test_mirroring.py @@ -546,6 +546,10 @@ class TestMirroring(CephFSTestCase): self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + first = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + # take a snapshot self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) @@ -554,6 +558,11 @@ class TestMirroring(CephFSTestCase): "client.mirror_remote@ceph", '/d0', 'snap0', 1) self.verify_snapshot('d0', 'snap0') + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + second = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(second["counters"]["snaps_synced"], first["counters"]["snaps_synced"]) + # some more IO self.mount_a.run_shell(["mkdir", "d0/d00"]) self.mount_a.run_shell(["mkdir", "d0/d01"]) @@ -569,6 +578,11 @@ class TestMirroring(CephFSTestCase): "client.mirror_remote@ceph", '/d0', 'snap1', 2) self.verify_snapshot('d0', 'snap1') + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + third = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(third["counters"]["snaps_synced"], second["counters"]["snaps_synced"]) + # delete a snapshot self.mount_a.run_shell(["rmdir", "d0/.snap/snap0"]) @@ -577,6 +591,10 @@ class TestMirroring(CephFSTestCase): self.assertTrue('snap0' not in snap_list) self.check_peer_status_deleted_snap(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0', 1) + # check snaps_deleted + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + fourth = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(fourth["counters"]["snaps_deleted"], third["counters"]["snaps_deleted"]) # rename a snapshot self.mount_a.run_shell(["mv", "d0/.snap/snap1", "d0/.snap/snap2"]) @@ -587,6 +605,10 @@ class TestMirroring(CephFSTestCase): self.assertTrue('snap2' in snap_list) self.check_peer_status_renamed_snap(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0', 1) + # check snaps_renamed + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + fifth = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(fifth["counters"]["snaps_renamed"], fourth["counters"]["snaps_renamed"]) self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0') self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) @@ -626,6 +648,12 @@ class TestMirroring(CephFSTestCase): snap_list = self.mount_b.ls(path='d0/.snap') self.assertTrue('snap0' not in snap_list) + + # check sync_failures + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vmirror_peers = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vmirror_peers["counters"]["sync_failures"], 0) + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) def test_cephfs_mirror_restart_sync_on_blocklist(self): @@ -655,6 +683,10 @@ class TestMirroring(CephFSTestCase): # fetch rados address for blacklist check rados_inst = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + # take a snapshot self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) @@ -683,6 +715,10 @@ class TestMirroring(CephFSTestCase): self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0', 'snap0', expected_snap_count=1) self.verify_snapshot('d0', 'snap0') + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vafter["counters"]["snaps_synced"], vbefore["counters"]["snaps_synced"]) self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0') self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) @@ -691,6 +727,10 @@ class TestMirroring(CephFSTestCase): self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vfirst = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + # add a non-existent directory for synchronization self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') @@ -707,6 +747,10 @@ class TestMirroring(CephFSTestCase): time.sleep(120) self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0', 'snap0', 1) + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vsecond = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vsecond["counters"]["snaps_synced"], vfirst["counters"]["snaps_synced"]) self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) def test_cephfs_mirror_service_daemon_status(self): @@ -760,7 +804,7 @@ class TestMirroring(CephFSTestCase): self.disable_mirroring_module() # enable mirroring through mon interface -- this should result in the mirror daemon - # failing to enable mirroring due to absence of `cephfs_mirorr` index object. + # failing to enable mirroring due to absence of `cephfs_mirror` index object. self.run_ceph_cmd("fs", "mirror", "enable", self.primary_fs_name) with safe_while(sleep=5, tries=10, action='wait for failed state') as proceed: @@ -888,6 +932,10 @@ class TestMirroring(CephFSTestCase): self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + # take a snapshot self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) @@ -896,6 +944,10 @@ class TestMirroring(CephFSTestCase): "client.mirror_remote@ceph", '/d0', 'snap0', 1) self.verify_snapshot('d0', 'snap0') + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vafter["counters"]["snaps_synced"], vbefore["counters"]["snaps_synced"]) self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0') self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) @@ -907,12 +959,20 @@ class TestMirroring(CephFSTestCase): self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0/d1/d2/d3') self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vfirst = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + # take a snapshot self.mount_a.run_shell(["mkdir", "d0/d1/d2/d3/.snap/snap0"]) time.sleep(30) self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0/d1/d2/d3', 'snap0', 1) + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vsecond = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vsecond["counters"]["snaps_synced"], vfirst["counters"]["snaps_synced"]) # create snapshots in parent directories self.mount_a.run_shell(["mkdir", "d0/.snap/snap_d0"]) @@ -924,12 +984,20 @@ class TestMirroring(CephFSTestCase): time.sleep(30) self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0/d1/d2/d3', 'snap1', 2) + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vthird = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vthird["counters"]["snaps_synced"], vsecond["counters"]["snaps_synced"]) self.mount_a.run_shell(["rmdir", "d0/d1/d2/d3/.snap/snap0"]) self.mount_a.run_shell(["rmdir", "d0/d1/d2/d3/.snap/snap1"]) time.sleep(15) self.check_peer_status_deleted_snap(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0/d1/d2/d3', 2) + # check snaps_deleted + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vfourth = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vfourth["counters"]["snaps_deleted"], vthird["counters"]["snaps_deleted"]) self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0/d1/d2/d3') self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) @@ -1033,6 +1101,9 @@ class TestMirroring(CephFSTestCase): self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) self.add_directory(self.primary_fs_name, self.primary_fs_id, f'/{repo_path}') + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vfirst = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_a']) # full copy, takes time @@ -1040,6 +1111,10 @@ class TestMirroring(CephFSTestCase): self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", f'/{repo_path}', 'snap_a', 1) self.verify_snapshot(repo_path, 'snap_a') + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vsecond = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vsecond["counters"]["snaps_synced"], vfirst["counters"]["snaps_synced"]) # create some diff num = random.randint(5, 20) @@ -1052,6 +1127,9 @@ class TestMirroring(CephFSTestCase): self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", f'/{repo_path}', 'snap_b', 2) self.verify_snapshot(repo_path, 'snap_b') + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vthird = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vthird["counters"]["snaps_synced"], vsecond["counters"]["snaps_synced"]) # diff again, this time back to HEAD log.debug('resetting to HEAD') @@ -1063,6 +1141,9 @@ class TestMirroring(CephFSTestCase): self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", f'/{repo_path}', 'snap_c', 3) self.verify_snapshot(repo_path, 'snap_c') + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vfourth = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vfourth["counters"]["snaps_synced"], vthird["counters"]["snaps_synced"]) self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) @@ -1133,11 +1214,18 @@ class TestMirroring(CephFSTestCase): while turns != len(typs): snapname = f'snap_{turns}' cleanup_and_create_with_type('d0', fnames) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] self.mount_a.run_shell(['mkdir', f'd0/.snap/{snapname}']) time.sleep(30) self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0', snapname, turns+1) verify_types('d0', fnames, snapname) + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vafter["counters"]["snaps_synced"], vbefore["counters"]["snaps_synced"]) + # next type typs.rotate(1) turns += 1 @@ -1184,6 +1272,9 @@ class TestMirroring(CephFSTestCase): self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) self.add_directory(self.primary_fs_name, self.primary_fs_id, f'/{repo_path}') + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vfirst = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_a']) # full copy, takes time @@ -1191,6 +1282,9 @@ class TestMirroring(CephFSTestCase): self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", f'/{repo_path}', 'snap_a', 1) self.verify_snapshot(repo_path, 'snap_a') + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vsecond = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vsecond["counters"]["snaps_synced"], vfirst["counters"]["snaps_synced"]) # create some diff num = random.randint(60, 100) @@ -1207,6 +1301,9 @@ class TestMirroring(CephFSTestCase): self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", f'/{repo_path}', 'snap_b', 2) self.verify_snapshot(repo_path, 'snap_b') + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vthird = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vthird["counters"]["snaps_synced"], vsecond["counters"]["snaps_synced"]) self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) @@ -1265,6 +1362,9 @@ class TestMirroring(CephFSTestCase): self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d2') self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] # take snapshots log.debug('taking snapshots') self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) @@ -1326,6 +1426,10 @@ class TestMirroring(CephFSTestCase): self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d2', 'snap0', 1) self.verify_snapshot('d2', 'snap0') + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vafter["counters"]["snaps_synced"], vbefore["counters"]["snaps_synced"]) + self.assertGreater(vafter["counters"]["snaps_deleted"], vbefore["counters"]["snaps_deleted"]) self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) @@ -1354,6 +1458,11 @@ class TestMirroring(CephFSTestCase): time.sleep(60) self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/l1', 'snap0', 1) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vmirror_peers = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + snaps_synced = vmirror_peers["counters"]["snaps_synced"] + self.assertEqual(snaps_synced, 1, f"Mismatch snaps_synced: {snaps_synced} vs 1") mode_local = self.mount_a.run_shell(["stat", "--format=%A", "l1"]).stdout.getvalue().strip() mode_remote = self.mount_b.run_shell(["stat", "--format=%A", "l1"]).stdout.getvalue().strip() -- 2.39.5