From 043a330ed0e02a411ed9e95b78329428b7d287ec Mon Sep 17 00:00:00 2001 From: Joseph Sawaya Date: Mon, 21 Mar 2022 15:02:53 -0400 Subject: [PATCH] Remove orchestrator from rook task and suite This commit removes orchestrator commands from the Rook task and the Rook test suite because the Rook orchestrator is not being maintained, and the Rook orchestrator CLI is obsolete. This should also clarify the issue: https://tracker.ceph.com/issues/53680 Signed-off-by: Joseph Sawaya --- qa/suites/orch/rook/smoke/1-rook.yaml | 6 -- .../rook/smoke/2-workload/radosbench.yaml | 7 -- qa/suites/orch/rook/smoke/3-final.yaml | 46 ----------- qa/tasks/rook.py | 81 +++++++------------ 4 files changed, 29 insertions(+), 111 deletions(-) delete mode 100644 qa/suites/orch/rook/smoke/3-final.yaml diff --git a/qa/suites/orch/rook/smoke/1-rook.yaml b/qa/suites/orch/rook/smoke/1-rook.yaml index e990b2d51d0..8182845e9d9 100644 --- a/qa/suites/orch/rook/smoke/1-rook.yaml +++ b/qa/suites/orch/rook/smoke/1-rook.yaml @@ -1,8 +1,2 @@ tasks: - rook: -- rook.shell: - - ceph -s - - ceph orch status - - ceph orch ps - - ceph orch ls - - ceph orch device ls diff --git a/qa/suites/orch/rook/smoke/2-workload/radosbench.yaml b/qa/suites/orch/rook/smoke/2-workload/radosbench.yaml index 8cfd6d9b1b7..fd71605c815 100644 --- a/qa/suites/orch/rook/smoke/2-workload/radosbench.yaml +++ b/qa/suites/orch/rook/smoke/2-workload/radosbench.yaml @@ -3,10 +3,3 @@ tasks: host.a: - radosbench: clients: [client.a] -- rook.shell: - commands: - - | - ceph orch host label add `hostname` foo - ceph orch host ls | grep foo - ceph orch host label rm `hostname` foo - ceph orch host ls | grep -v foo diff --git a/qa/suites/orch/rook/smoke/3-final.yaml b/qa/suites/orch/rook/smoke/3-final.yaml deleted file mode 100644 index 27d8a04e84e..00000000000 --- a/qa/suites/orch/rook/smoke/3-final.yaml +++ /dev/null @@ -1,46 +0,0 @@ -tasks: -- exec: - host.a: - - | - set -ex - toolbox() { - kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- "$@" - } - orig_num_osd=`toolbox ceph osd stat | cut -f3 -d " "` - toolbox ceph orch osd rm 0 --force - removed_pv="" - while [ "$removed_pv" = "" ] - do - removed_pv=`kubectl get pv | grep Released | cut -f1 -d " "` - sleep 3s - done - target_path=`kubectl get pv $removed_pv -o jsonpath='{.spec.local.path}'` - host=`echo $removed_pv | cut -f1 -d "-"` - toolbox ceph orch device zap $host $target_path --force - zap_completion="0" - while [ "$zap_completion" = "0" ] - do - zap_completion=`kubectl get job -n rook-ceph rook-ceph-device-zap -o jsonpath='{.status.succeeded.path}'` - sleep 3s - done - kubectl patch pv $removed_pv -p '{"spec":{"claimRef": null}}' - toolbox ceph orch apply osd --all-available-devices - kubectl delete job rook-ceph-device-zap -n rook-ceph - num_osd="0" - while [ "$num_osd" != "$orig_num_osd" ] - do - echo "waiting for osd to come back up" - num_osd=`toolbox ceph osd stat | cut -f3 -d " "` - sleep 30s - done -- rook.shell: - commands: - - ceph orch status - - ceph orch ps - - ceph orch ls - - ceph orch host ls - - ceph orch device ls - - ceph orch apply rgw foo - - ceph orch apply mds foo - - ceph orch apply rbd-mirror - - ceph orch apply nfs foo --port 12777 diff --git a/qa/tasks/rook.py b/qa/tasks/rook.py index c4e1dffe967..427f8324e30 100644 --- a/qa/tasks/rook.py +++ b/qa/tasks/rook.py @@ -367,6 +367,30 @@ def rook_cluster(ctx, config): 'count': num_hosts, 'allowMultiplePerNode': True, }, + 'storage': { + 'storageClassDeviceSets': [ + { + 'name': 'scratch', + 'count': num_devs, + 'portable': False, + 'volumeClaimTemplates': [ + { + 'metadata': {'name': 'data'}, + 'spec': { + 'resources': { + 'requests': { + 'storage': '10Gi' # <= (lte) the actual PV size + } + }, + 'storageClassName': 'scratch', + 'volumeMode': 'Block', + 'accessModes': ['ReadWriteOnce'], + }, + }, + ], + } + ], + }, } } teuthology.deep_merge(cluster['spec'], config.get('spec', {})) @@ -433,6 +457,11 @@ def rook_toolbox(ctx, config): ['-n', 'rook-ceph', 'get', 'pods', '-l', 'app=rook-ceph-tools'], stdout=BytesIO(), ) + _kubectl( + ctx, config, + ['-n', 'rook-ceph', 'get', 'pods'], + stdout=BytesIO(), + ) for line in p.stdout.getvalue().decode('utf-8').strip().splitlines(): name, ready, status, _ = line.split(None, 3) if status == 'Running': @@ -452,39 +481,6 @@ def rook_toolbox(ctx, config): ], check_status=False) -@contextlib.contextmanager -def wait_for_orch(ctx, config): - log.info('Waiting for mgr/rook orchestrator to be available') - with safe_while(sleep=10, tries=90, action="check orch status") as proceed: - while proceed(): - p = _shell(ctx, config, ['ceph', 'orch', 'status', '-f', 'json'], - stdout=BytesIO(), - check_status=False) - if p.exitstatus == 0: - r = json.loads(p.stdout.getvalue().decode('utf-8')) - if r.get('available') and r.get('backend') == 'rook': - log.info(' mgr/rook orchestrator is active') - break - - yield - - -@contextlib.contextmanager -def rook_post_config(ctx, config): - try: - _shell(ctx, config, ['ceph', 'config', 'set', 'mgr', 'mgr/rook/storage_class', - 'scratch']) - _shell(ctx, config, ['ceph', 'orch', 'apply', 'osd', '--all-available-devices']) - yield - - except Exception as e: - log.exception(e) - raise - - finally: - pass - - @contextlib.contextmanager def wait_for_osds(ctx, config): cluster_name = config.get('cluster', 'ceph') @@ -635,8 +631,6 @@ def task(ctx, config): lambda: ceph_log(ctx, config), lambda: rook_cluster(ctx, config), lambda: rook_toolbox(ctx, config), - lambda: wait_for_orch(ctx, config), - lambda: rook_post_config(ctx, config), lambda: wait_for_osds(ctx, config), lambda: ceph_config_keyring(ctx, config), lambda: ceph_clients(ctx, config), @@ -657,21 +651,4 @@ def task(ctx, config): yield finally: - to_remove = [] - ret = _shell(ctx, config, ['ceph', 'orch', 'ls', '-f', 'json'], stdout=BytesIO()) - if ret.exitstatus == 0: - r = json.loads(ret.stdout.getvalue().decode('utf-8')) - for service in r: - if service['service_type'] in ['rgw', 'mds', 'nfs', 'rbd-mirror']: - _shell(ctx, config, ['ceph', 'orch', 'rm', service['service_name']]) - to_remove.append(service['service_name']) - with safe_while(sleep=10, tries=90, action="waiting for service removal") as proceed: - while proceed(): - ret = _shell(ctx, config, ['ceph', 'orch', 'ls', '-f', 'json'], stdout=BytesIO()) - if ret.exitstatus == 0: - r = json.loads(ret.stdout.getvalue().decode('utf-8')) - still_up = [service['service_name'] for service in r] - matches = set(still_up).intersection(to_remove) - if not matches: - break log.info('Tearing down rook') -- 2.47.3