]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
qa/tasks/rook: test reapplication of drive groups stored in mgr 43139/head
authorJoseph Sawaya <jsawaya@redhat.com>
Wed, 8 Sep 2021 15:33:14 +0000 (11:33 -0400)
committerJoseph Sawaya <jsawaya@redhat.com>
Thu, 16 Dec 2021 23:17:29 +0000 (18:17 -0500)
This commit adds testing for the drive_group_loop in the Rook orchestrator
that reapplies drive groups that were applied previously.

This test removes an OSD, zaps the underlying device then waits for the OSD
to be re-created by the drive_group_loop.

This commit also updates the rook test suite to test v1.7.2 instead of 1.7.0
since `orch device zap` is only supported from v1.7.2 onwards.

Fixes: https://tracker.ceph.com/issues/53501
Signed-off-by: Joseph Sawaya <jsawaya@redhat.com>
qa/suites/orch/rook/smoke/3-final.yaml
qa/suites/orch/rook/smoke/rook/1.7.0.yaml [deleted file]
qa/suites/orch/rook/smoke/rook/1.7.2.yaml [new file with mode: 0644]
qa/tasks/kubeadm.py
qa/tasks/rook.py
src/pybind/mgr/rook/rook_cluster.py

index ece8469fab44b972fbd8d1a3c62a8d7d15463a55..27d8a04e84eb2a4cfdd39a6907249b947758c4ce 100644 (file)
@@ -1,4 +1,38 @@
 tasks:
+- exec:
+    host.a:
+      - |
+        set -ex
+        toolbox() {
+            kubectl -n rook-ceph exec -it deploy/rook-ceph-tools --  "$@"
+        }
+        orig_num_osd=`toolbox ceph osd stat | cut -f3 -d " "`
+        toolbox ceph orch osd rm 0 --force
+        removed_pv=""
+        while [ "$removed_pv" = "" ]
+        do
+            removed_pv=`kubectl get pv | grep Released | cut -f1 -d " "`
+            sleep 3s
+        done
+        target_path=`kubectl get pv $removed_pv -o jsonpath='{.spec.local.path}'`
+        host=`echo $removed_pv | cut -f1 -d "-"`
+        toolbox ceph orch device zap $host $target_path --force
+        zap_completion="0"
+        while [ "$zap_completion" = "0"  ]
+        do
+            zap_completion=`kubectl get job -n rook-ceph rook-ceph-device-zap -o jsonpath='{.status.succeeded.path}'`
+            sleep 3s
+        done
+        kubectl patch pv $removed_pv -p '{"spec":{"claimRef": null}}'
+        toolbox ceph orch apply osd --all-available-devices
+        kubectl delete job rook-ceph-device-zap -n rook-ceph
+        num_osd="0"
+        while [ "$num_osd" != "$orig_num_osd" ]
+        do
+            echo "waiting for osd to come back up"
+            num_osd=`toolbox ceph osd stat | cut -f3 -d " "`
+            sleep 30s
+        done
 - rook.shell:
     commands:
       - ceph orch status
diff --git a/qa/suites/orch/rook/smoke/rook/1.7.0.yaml b/qa/suites/orch/rook/smoke/rook/1.7.0.yaml
deleted file mode 100644 (file)
index 702d3bf..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-overrides:
-  rook:
-    rook_image: rook/ceph:v1.7.0
-    rook_branch: v1.7.0
diff --git a/qa/suites/orch/rook/smoke/rook/1.7.2.yaml b/qa/suites/orch/rook/smoke/rook/1.7.2.yaml
new file mode 100644 (file)
index 0000000..de96c58
--- /dev/null
@@ -0,0 +1,4 @@
+overrides:
+  rook:
+    rook_image: rook/ceph:v1.7.2
+    rook_branch: v1.7.2
index b212d06d8f0b90690731b99837823a7ad1a9c85f..dae9f6b2c95255112868a1fd41b30d4ad9a4a310 100644 (file)
@@ -468,7 +468,7 @@ def setup_pvs(ctx, config):
                     'volumeMode': 'Block',
                     'accessModes': ['ReadWriteOnce'],
                     'capacity': {'storage': '100Gi'},  # doesn't matter?
-                    'persistentVolumeReclaimPolicy': 'Recycle',
+                    'persistentVolumeReclaimPolicy': 'Retain',
                     'storageClassName': 'scratch',
                     'local': {'path': dev},
                     'nodeAffinity': {
index 15e2f04518962ec3fc005bfe31ddb3ff9476aa27..c4e1dffe967bca94df9a0b9439d07ca1abd1f9f5 100644 (file)
@@ -22,6 +22,14 @@ from tasks.cephadm import update_archive_setting
 
 log = logging.getLogger(__name__)
 
+def path_to_examples(ctx, cluster_name : str) -> str:
+    for p in ['rook/deploy/examples/', 'rook/cluster/examples/kubernetes/ceph/']:
+        try: 
+           ctx.rook[cluster_name].remote.get_file(p + 'operator.yaml')
+           return p
+        except:
+            pass 
+    assert False, 'Path to examples not found'
 
 def _kubectl(ctx, config, args, **kwargs):
     cluster_name = config.get('cluster', 'ceph')
@@ -94,8 +102,12 @@ def rook_operator(ctx, config):
     )
 
     # operator.yaml
+    log.info(os.path.abspath(os.getcwd()))
+    object_methods = [method_name for method_name in dir(ctx.rook[cluster_name].remote)
+                  if callable(getattr(ctx.rook[cluster_name].remote, method_name))]
+    log.info(object_methods)
     operator_yaml = ctx.rook[cluster_name].remote.read_file(
-        'rook/cluster/examples/kubernetes/ceph/operator.yaml'
+        (path_to_examples(ctx, cluster_name) + 'operator.yaml')
     )
     rook_image = config.get('rook_image')
     if rook_image:
@@ -111,8 +123,8 @@ def rook_operator(ctx, config):
         log.info('Deploying operator')
         _kubectl(ctx, config, [
             'create',
-            '-f', 'rook/cluster/examples/kubernetes/ceph/crds.yaml',
-            '-f', 'rook/cluster/examples/kubernetes/ceph/common.yaml',
+            '-f', (path_to_examples(ctx, cluster_name) + 'crds.yaml'),
+            '-f', (path_to_examples(ctx, cluster_name) + 'common.yaml'),
             '-f', 'operator.yaml',
         ])
 
@@ -165,11 +177,11 @@ def rook_operator(ctx, config):
             # fails sometimes when deleting some of the CRDs... not sure why!)
             _kubectl(ctx, config, [
                 'delete',
-                '-f', 'rook/cluster/examples/kubernetes/ceph/common.yaml',
+                '-f', (path_to_examples() + 'common.yaml'),
             ])
             _kubectl(ctx, config, [
                 'delete',
-                '-f', 'rook/cluster/examples/kubernetes/ceph/crds.yaml',
+                '-f', (path_to_examples() + 'crds.yaml'),
             ])
         ctx.rook[cluster_name].remote.run(args=['rm', '-rf', 'rook', 'operator.yaml'])
         if op_job:
@@ -409,7 +421,7 @@ def rook_toolbox(ctx, config):
     try:
         _kubectl(ctx, config, [
             'create',
-            '-f', 'rook/cluster/examples/kubernetes/ceph/toolbox.yaml',
+            '-f', (path_to_examples(ctx, cluster_name) + 'toolbox.yaml'),
         ])
 
         log.info('Waiting for tools container to start')
@@ -436,7 +448,7 @@ def rook_toolbox(ctx, config):
     finally:
         _kubectl(ctx, config, [
             'delete',
-            '-f', 'rook/cluster/examples/kubernetes/ceph/toolbox.yaml',
+            '-f', (path_to_examples(ctx, cluster_name) + 'toolbox.yaml'),
         ], check_status=False)
 
 
@@ -493,7 +505,6 @@ def wait_for_osds(ctx, config):
 
     yield
 
-
 @contextlib.contextmanager
 def ceph_config_keyring(ctx, config):
     # get config and push to hosts
index ab433e71ca9949b2f107682d3f63439f04035b7a..4662b06143673acfaf994a89aee5c9b6a32d6e06 100644 (file)
@@ -423,7 +423,6 @@ class DefaultCreator():
             ]
             for device in to_create:
                 new_scds = self.device_to_device_set(drive_group, device)
-                new_cluster.spec.storage.storageClassDeviceSets.append(new_scds)
                 if new_scds.name not in existing_scds:
                     new_cluster.spec.storage.storageClassDeviceSets.append(new_scds)
             return new_cluster
@@ -1187,6 +1186,7 @@ class RookCluster(object):
                                     )
                                 ],
                                 security_context=client.V1SecurityContext(
+                                    run_as_user=0,
                                     privileged=True
                                 ),
                                 volume_mounts=[