From 7cb3e58ecba037d21d76e417ee352a997fd028f0 Mon Sep 17 00:00:00 2001 From: dparmar18 Date: Mon, 13 Feb 2023 20:02:06 +0530 Subject: [PATCH] qa: fix cluster creation failure in test_nfs.py Also adds a function _nfs_complete_cmd() that returns process obj so that stdout/stderr can be used for evaluation(_nfs_cmd() uses raw_cluster_cmd() that returns just stdout and it became difficult to time cluster creation errors in _test_create_cluster()). It takes sometime to update the cluster data, therefore running the command set (check nfs server status -> nfs cluster create test -> check cluster status) in a loop (max six iteration with sleep of 5 secs at each iteration) fixes the issue. Fixes: https://tracker.ceph.com/issues/58744 Signed-off-by: Dhairya Parmar --- qa/tasks/cephfs/test_nfs.py | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/qa/tasks/cephfs/test_nfs.py b/qa/tasks/cephfs/test_nfs.py index 556ecabcc6eac..1fe727dbb0635 100644 --- a/qa/tasks/cephfs/test_nfs.py +++ b/qa/tasks/cephfs/test_nfs.py @@ -3,7 +3,7 @@ import errno import json import time import logging -from io import BytesIO +from io import BytesIO, StringIO from tasks.mgr.mgr_test_case import MgrTestCase from teuthology import contextutil @@ -21,6 +21,12 @@ class TestNFS(MgrTestCase): def _nfs_cmd(self, *args): return self._cmd("nfs", *args) + def _nfs_complete_cmd(self, cmd): + return self.mgr_cluster.mon_manager.run_cluster_cmd(args=f"nfs {cmd}", + stdout=StringIO(), + stderr=StringIO(), + check_status=False) + def _orch_cmd(self, *args): return self._cmd("orch", *args) @@ -145,11 +151,24 @@ class TestNFS(MgrTestCase): ''' Test single nfs cluster deployment. ''' - # Disable any running nfs ganesha daemon - self._check_nfs_server_status() - self._nfs_cmd('cluster', 'create', self.cluster_id) - # Check for expected status and daemon name (nfs.) - self._check_nfs_cluster_status('running', 'NFS Ganesha cluster deployment failed') + with contextutil.safe_while(sleep=4, tries=10) as proceed: + while proceed(): + try: + # Disable any running nfs ganesha daemon + self._check_nfs_server_status() + cluster_create = self._nfs_complete_cmd( + f'cluster create {self.cluster_id}') + if cluster_create.stderr and 'cluster already exists' \ + in cluster_create.stderr.getvalue(): + self._test_delete_cluster() + continue + # Check for expected status and daemon name + # (nfs.) + self._check_nfs_cluster_status( + 'running', 'NFS Ganesha cluster deployment failed') + break + except (AssertionError, CommandFailedError) as e: + log.warning(f'{e}, retrying') def _test_delete_cluster(self): ''' -- 2.39.5