From: Vallari Agrawal Date: Tue, 28 Jan 2025 09:18:15 +0000 (+0530) Subject: qa/tasks/nvmeof.py: Fix do_checks() method X-Git-Tag: v20.0.0~263^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=7dfd3d36e9e3ea9c97e85c8c0dcd74d8ef1a7329;p=ceph.git qa/tasks/nvmeof.py: Fix do_checks() method All checks currently run on initator node, now run all "ceph" commands on one of gateway hosts instead of initator nodes. And run "nvme list" and "nvme list-subsys" checks on initator node. Add retry (5 times) to do_checks if any command fails. Signed-off-by: Vallari Agrawal --- diff --git a/qa/tasks/nvmeof.py b/qa/tasks/nvmeof.py index 33cb51b8c01f..aeca32f1b657 100644 --- a/qa/tasks/nvmeof.py +++ b/qa/tasks/nvmeof.py @@ -346,24 +346,29 @@ class NvmeofThrasher(Thrasher, Greenlet): Run some checks to see if everything is running well during thrashing. """ self.log('display and verify stats:') - for d in self.daemons: - d.remote.sh(d.status_cmd, check_status=False) - check_cmd = [ - 'ceph', 'orch', 'ls', '--refresh', - run.Raw('&&'), 'ceph', 'orch', 'ps', '--daemon-type', 'nvmeof', '--refresh', - run.Raw('&&'), 'ceph', 'health', 'detail', - run.Raw('&&'), 'ceph', '-s', - run.Raw('&&'), 'ceph', 'nvme-gw', 'show', 'mypool', 'mygroup0', - run.Raw('&&'), 'sudo', 'nvme', 'list', - ] - self.checker_host.run(args=check_cmd).wait() - - for dev in self.devices: - device_check_cmd = [ - 'sudo', 'nvme', 'list-subsys', dev, - run.Raw('|'), 'grep', 'live optimized' - ] - self.checker_host.run(args=device_check_cmd) + for retry in range(5): + try: + random_gateway_host = None + initiator_host = self.checker_host + for d in self.daemons: + random_gateway_host = d.remote + d.remote.sh(d.status_cmd, check_status=False) + random_gateway_host.run(args=['ceph', 'orch', 'ls', '--refresh']) + random_gateway_host.run(args=['ceph', 'orch', 'ps', '--daemon-type', 'nvmeof', '--refresh']) + random_gateway_host.run(args=['ceph', 'health', 'detail']) + random_gateway_host.run(args=['ceph', '-s']) + random_gateway_host.run(args=['ceph', 'nvme-gw', 'show', 'mypool', 'mygroup0']) + + initiator_host.run(args=['sudo', 'nvme', 'list']) + for dev in self.devices: + device_check_cmd = [ + 'sudo', 'nvme', 'list-subsys', dev, + run.Raw('|'), 'grep', 'live optimized' + ] + initiator_host.run(args=device_check_cmd) + break + except run.CommandFailedError: + self.log(f"retry do_checks() for {retry} time") def switch_task(self): """