]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
qa: fix cluster creation failure in test_nfs.py
authordparmar18 <dparmar@redhat.com>
Mon, 13 Feb 2023 14:32:06 +0000 (20:02 +0530)
committerDhairya Parmar <dparmar@redhat.com>
Fri, 31 Mar 2023 08:12:47 +0000 (13:42 +0530)
Also adds a function _nfs_complete_cmd() that returns process obj so that stdout/stderr
can be used for evaluation(_nfs_cmd() uses raw_cluster_cmd() that returns just stdout
and it became difficult to time cluster creation errors in _test_create_cluster()).

It takes sometime to update the cluster data, therefore running the command set
(check nfs server status -> nfs cluster create test -> check cluster status) in
a loop (max six iteration with sleep of 5 secs at each iteration) fixes the issue.

Fixes: https://tracker.ceph.com/issues/58744
Signed-off-by: Dhairya Parmar <dparmar@redhat.com>
(cherry picked from commit 7cb3e58ecba037d21d76e417ee352a997fd028f0)

qa/tasks/cephfs/test_nfs.py

index 556ecabcc6eacf0484f3ee3c285ce9e643f8f2d5..1fe727dbb06352463452112c63efb3e9e154f62e 100644 (file)
@@ -3,7 +3,7 @@ import errno
 import json
 import time
 import logging
-from io import BytesIO
+from io import BytesIO, StringIO
 
 from tasks.mgr.mgr_test_case import MgrTestCase
 from teuthology import contextutil
@@ -21,6 +21,12 @@ class TestNFS(MgrTestCase):
     def _nfs_cmd(self, *args):
         return self._cmd("nfs", *args)
 
+    def _nfs_complete_cmd(self, cmd):
+        return self.mgr_cluster.mon_manager.run_cluster_cmd(args=f"nfs {cmd}",
+                                                            stdout=StringIO(),
+                                                            stderr=StringIO(),
+                                                            check_status=False)
+
     def _orch_cmd(self, *args):
         return self._cmd("orch", *args)
 
@@ -145,11 +151,24 @@ class TestNFS(MgrTestCase):
         '''
         Test single nfs cluster deployment.
         '''
-        # Disable any running nfs ganesha daemon
-        self._check_nfs_server_status()
-        self._nfs_cmd('cluster', 'create', self.cluster_id)
-        # Check for expected status and daemon name (nfs.<cluster_id>)
-        self._check_nfs_cluster_status('running', 'NFS Ganesha cluster deployment failed')
+        with contextutil.safe_while(sleep=4, tries=10) as proceed:
+            while proceed():
+                try:
+                    # Disable any running nfs ganesha daemon
+                    self._check_nfs_server_status()
+                    cluster_create = self._nfs_complete_cmd(
+                        f'cluster create {self.cluster_id}')
+                    if cluster_create.stderr and 'cluster already exists' \
+                            in cluster_create.stderr.getvalue():
+                        self._test_delete_cluster()
+                        continue
+                    # Check for expected status and daemon name
+                    # (nfs.<cluster_id>)
+                    self._check_nfs_cluster_status(
+                        'running', 'NFS Ganesha cluster deployment failed')
+                    break
+                except (AssertionError, CommandFailedError) as e:
+                    log.warning(f'{e}, retrying')
 
     def _test_delete_cluster(self):
         '''