From 0bf55d6a8bb38942a33220ee6ab2890fb557476e Mon Sep 17 00:00:00 2001 From: Redouane Kachach Date: Fri, 29 May 2026 11:09:44 +0200 Subject: [PATCH] qa/tasks: capture CommandCrashedError when running nvme list cmd The safe_while retry loop does not catch exceptions, so a CommandCrashedError from `nvme list` bypasses it entirely. Catch CommandCrashedError and continue the retry loop instead. Fixes: https://tracker.ceph.com/issues/76984 Signed-off-by: Redouane Kachach --- qa/tasks/nvme_loop.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/qa/tasks/nvme_loop.py b/qa/tasks/nvme_loop.py index fdec467a16d2..1aef38bf51fa 100644 --- a/qa/tasks/nvme_loop.py +++ b/qa/tasks/nvme_loop.py @@ -5,6 +5,7 @@ import json from io import StringIO from teuthology import misc as teuthology from teuthology import contextutil +from teuthology.exceptions import CommandCrashedError from teuthology.orchestra import run @@ -68,7 +69,17 @@ def task(ctx, config): with contextutil.safe_while(sleep=1, tries=15) as proceed: while proceed(): remote.run(args=['lsblk'], stdout=StringIO()) - p = remote.run(args=['sudo', 'nvme', 'list', '-o', 'json'], stdout=StringIO()) + try: + p = remote.run( + args=['sudo', 'nvme', 'list', '-o', 'json'], + stdout=StringIO(), + ) + except CommandCrashedError: + log.warning( + 'nvme list -o json command failed, retrying...' + ) + continue + new_devs = [] # `nvme list -o json` will return one of the following output: '''{ -- 2.47.3