]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
qa/tasks/nvmeof.py: retry do_check if gw in CREATED
authorVallari Agrawal <vallari.agrawal@ibm.com>
Fri, 13 Mar 2026 08:32:06 +0000 (14:02 +0530)
committerVallari Agrawal <vallari.agrawal@ibm.com>
Wed, 1 Apr 2026 05:21:47 +0000 (10:51 +0530)
In do_check(), ensure all the namespaces+listeners are
added in gateway (i.e. gateway not in CREATED state)
after gateway is restarted. This is to prevent going into
next iteration of tharshing while gateways are still being
updated.

Fixes: https://tracker.ceph.com/issues/75382
Signed-off-by: Vallari Agrawal <vallari.agrawal@ibm.com>
(cherry picked from commit 86ac2893fbf92bf51014a8f059bc562131421d99)

qa/tasks/nvmeof.py

index af13719c19ee396b50c4619f2f7adfec1c7790e8..93fd85a9a04d0a155c4ae05903be63f86ac57bd6 100644 (file)
@@ -1,6 +1,7 @@
 import logging
 import random
 import time
+import json
 from collections import defaultdict
 from datetime import datetime
 from textwrap import dedent
@@ -382,6 +383,7 @@ class NvmeofThrasher(Thrasher, Greenlet):
         """
         self.log('display and verify stats:')
         max_retry = 5
+        retry_delay = 30
         for retry in range(1, max_retry+1):
             try: 
                 random_gateway_host = None
@@ -393,7 +395,14 @@ class NvmeofThrasher(Thrasher, Greenlet):
                 random_gateway_host.run(args=['ceph', 'orch', 'ps', '--daemon-type', 'nvmeof'])
                 random_gateway_host.run(args=['ceph', 'health', 'detail'])
                 random_gateway_host.run(args=['ceph', '-s'])
-                random_gateway_host.run(args=['ceph', 'nvme-gw', 'show', 'mypool', 'mygroup0'])
+
+                gw_show = random_gateway_host.sh('ceph nvme-gw show mypool mygroup0')
+                gw_show_json = json.loads(gw_show)
+                if gw_show_json["num-namespaces"] > 30:
+                    retry_delay = int(gw_show_json["num-namespaces"]) / 3
+                if '"CREATED"' in gw_show:
+                    raise Exception("Some gateway is in CREATED state - in middle of restart")
+
                 initiator_host.run(args=['sudo', 'nvme', 'list'])
                 for dev in self.devices:
                     device_check_cmd = [
@@ -406,7 +415,7 @@ class NvmeofThrasher(Thrasher, Greenlet):
                 self.log(f"retry do_checks() for {retry} time")
                 if retry == max_retry:
                     raise
-                time.sleep(30) # blocking wait
+                time.sleep(retry_delay) # blocking wait
 
     def switch_task(self):
         """