]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
qa/tasks: capture CommandCrashedError when running nvme list cmd 69163/head
authorRedouane Kachach <rkachach@ibm.com>
Fri, 29 May 2026 09:09:44 +0000 (11:09 +0200)
committerRedouane Kachach <rkachach@ibm.com>
Fri, 29 May 2026 09:22:01 +0000 (11:22 +0200)
The safe_while retry loop does not catch exceptions, so a
CommandCrashedError from `nvme list` bypasses it entirely. Catch
CommandCrashedError and continue the retry loop instead.

Fixes: https://tracker.ceph.com/issues/76984
Signed-off-by: Redouane Kachach <rkachach@ibm.com>
qa/tasks/nvme_loop.py

index fdec467a16d247046cedd2c384b8b4ff8b91c993..1aef38bf51fa84a849cf87ac41af40ef59ca957d 100644 (file)
@@ -5,6 +5,7 @@ import json
 from io import StringIO
 from teuthology import misc as teuthology
 from teuthology import contextutil
+from teuthology.exceptions import CommandCrashedError
 from teuthology.orchestra import run
 
 
@@ -68,7 +69,17 @@ def task(ctx, config):
         with contextutil.safe_while(sleep=1, tries=15) as proceed:
             while proceed():
                 remote.run(args=['lsblk'], stdout=StringIO())
-                p = remote.run(args=['sudo', 'nvme', 'list', '-o', 'json'], stdout=StringIO())
+                try:
+                    p = remote.run(
+                        args=['sudo', 'nvme', 'list', '-o', 'json'],
+                        stdout=StringIO(),
+                    )
+                except CommandCrashedError:
+                    log.warning(
+                        'nvme list -o json command failed, retrying...'
+                    )
+                    continue
+
                 new_devs = []
                 # `nvme list -o json` will return one of the following output:
                 '''{