]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
qa/tasks/nvmeof.py: retry do_check if gw in CREATED
authorVallari Agrawal <vallari.agrawal@ibm.com>
Fri, 13 Mar 2026 08:32:06 +0000 (14:02 +0530)
committerVallari Agrawal <vallari.agrawal@ibm.com>
Sat, 14 Mar 2026 05:38:00 +0000 (11:08 +0530)
In do_check(), ensure all the namespaces+listeners are
added in gateway (i.e. gateway not in CREATED state)
after gateway is restarted. This is to prevent going into
next iteration of tharshing while gateways are still being
updated.

Fixes: https://tracker.ceph.com/issues/75382
Signed-off-by: Vallari Agrawal <vallari.agrawal@ibm.com>
qa/tasks/nvmeof.py

index a417cfa988ff082ab4f87cad6c83eb32898ce4f7..e5108a53c0dde6904097e14f4bb5f0dc729441fd 100644 (file)
@@ -1,6 +1,7 @@
 import logging
 import random
 import time
+import json
 from collections import defaultdict
 from datetime import datetime
 from textwrap import dedent
@@ -393,6 +394,7 @@ class NvmeofThrasher(Thrasher, Greenlet):
         """
         self.log('display and verify stats:')
         max_retry = 5
+        retry_delay = 30
         for retry in range(1, max_retry+1):
             try: 
                 random_gateway_host = None
@@ -404,7 +406,14 @@ class NvmeofThrasher(Thrasher, Greenlet):
                 random_gateway_host.run(args=['ceph', 'orch', 'ps', '--daemon-type', 'nvmeof'])
                 random_gateway_host.run(args=['ceph', 'health', 'detail'])
                 random_gateway_host.run(args=['ceph', '-s'])
-                random_gateway_host.run(args=['ceph', 'nvme-gw', 'show', 'mypool', 'mygroup0'])
+
+                gw_show = random_gateway_host.sh('ceph nvme-gw show mypool mygroup0')
+                gw_show_json = json.loads(gw_show)
+                if gw_show_json["num-namespaces"] > 30:
+                    retry_delay = int(gw_show_json["num-namespaces"]) / 3
+                if '"CREATED"' in gw_show:
+                    raise Exception("Some gateway is in CREATED state - in middle of restart")
+
                 initiator_host.run(args=['sudo', 'nvme', 'list'])
                 for dev in self.devices:
                     device_check_cmd = [
@@ -417,7 +426,7 @@ class NvmeofThrasher(Thrasher, Greenlet):
                 self.log(f"retry do_checks() for {retry} time")
                 if retry == max_retry:
                     raise
-                time.sleep(30) # blocking wait
+                time.sleep(retry_delay) # blocking wait
 
     def switch_task(self):
         """