From: Redouane Kachach Date: Fri, 29 May 2026 09:09:44 +0000 (+0200) Subject: qa/tasks: capture CommandCrashedError when running nvme list cmd X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=0bf55d6a8bb38942a33220ee6ab2890fb557476e;p=ceph.git qa/tasks: capture CommandCrashedError when running nvme list cmd The safe_while retry loop does not catch exceptions, so a CommandCrashedError from `nvme list` bypasses it entirely. Catch CommandCrashedError and continue the retry loop instead. Fixes: https://tracker.ceph.com/issues/76984 Signed-off-by: Redouane Kachach --- diff --git a/qa/tasks/nvme_loop.py b/qa/tasks/nvme_loop.py index fdec467a16d2..1aef38bf51fa 100644 --- a/qa/tasks/nvme_loop.py +++ b/qa/tasks/nvme_loop.py @@ -5,6 +5,7 @@ import json from io import StringIO from teuthology import misc as teuthology from teuthology import contextutil +from teuthology.exceptions import CommandCrashedError from teuthology.orchestra import run @@ -68,7 +69,17 @@ def task(ctx, config): with contextutil.safe_while(sleep=1, tries=15) as proceed: while proceed(): remote.run(args=['lsblk'], stdout=StringIO()) - p = remote.run(args=['sudo', 'nvme', 'list', '-o', 'json'], stdout=StringIO()) + try: + p = remote.run( + args=['sudo', 'nvme', 'list', '-o', 'json'], + stdout=StringIO(), + ) + except CommandCrashedError: + log.warning( + 'nvme list -o json command failed, retrying...' + ) + continue + new_devs = [] # `nvme list -o json` will return one of the following output: '''{