]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/rook: adding some basic rook e2e testing
authorRedouane Kachach <rkachach@redhat.com>
Mon, 15 Jan 2024 14:25:02 +0000 (15:25 +0100)
committerRedouane Kachach <rkachach@redhat.com>
Wed, 24 Jan 2024 10:00:54 +0000 (11:00 +0100)
Fixes: https://tracker.ceph.com/issues/64029
Signed-off-by: Redouane Kachach <rkachach@redhat.com>
src/pybind/mgr/rook/ci/cluster-specs/cluster-on-pvc-minikube.yaml [new file with mode: 0644]
src/pybind/mgr/rook/ci/scripts/bootstrap-rook-cluster.sh
src/pybind/mgr/rook/ci/tests/features/rook.feature
src/pybind/mgr/rook/ci/tests/features/steps/implementation.py
src/pybind/mgr/rook/ci/tests/features/steps/utils.py

diff --git a/src/pybind/mgr/rook/ci/cluster-specs/cluster-on-pvc-minikube.yaml b/src/pybind/mgr/rook/ci/cluster-specs/cluster-on-pvc-minikube.yaml
new file mode 100644 (file)
index 0000000..2732286
--- /dev/null
@@ -0,0 +1,198 @@
+#################################################################################################################
+# Define the settings for the rook-ceph cluster with settings for a minikube cluster with a single node
+
+# This example expects a single node minikube cluster with three extra disks: vdb, vdc and vdd. Please modify
+# it according to your environment. See the documentation for more details on storage settings available.
+
+# For example, to create the cluster:
+#   kubectl create -f crds.yaml -f common.yaml -f operator.yaml
+#   kubectl create -f cluster-on-pvc-minikube.yaml
+#################################################################################################################
+kind: StorageClass
+apiVersion: storage.k8s.io/v1
+metadata:
+  name: local-storage
+provisioner: kubernetes.io/no-provisioner
+volumeBindingMode: WaitForFirstConsumer
+---
+kind: PersistentVolume
+apiVersion: v1
+metadata:
+  name: local0-0
+spec:
+  storageClassName: local-storage
+  capacity:
+    storage: 10Gi
+  accessModes:
+    - ReadWriteOnce
+  persistentVolumeReclaimPolicy: Retain
+  # PV for mon must be a filesystem volume.
+  volumeMode: Filesystem
+  local:
+    # To use dm devices like logical volume, please replace `/dev/sdb` with their device names like `/dev/vg-name/lv-name`.
+    path: /dev/vdb
+  nodeAffinity:
+    required:
+      nodeSelectorTerms:
+        - matchExpressions:
+            - key: kubernetes.io/hostname
+              operator: In
+              values:
+                - minikube
+---
+kind: PersistentVolume
+apiVersion: v1
+metadata:
+  name: local0-1
+spec:
+  storageClassName: local-storage
+  capacity:
+    storage: 20Gi
+  accessModes:
+    - ReadWriteOnce
+  persistentVolumeReclaimPolicy: Retain
+  # PV for mon must be a filesystem volume.
+  volumeMode: Block
+  local:
+    # To use dm devices like logical volume, please replace `/dev/sdb` with their device names like `/dev/vg-name/lv-name`.
+    path: /dev/vdc
+  nodeAffinity:
+    required:
+      nodeSelectorTerms:
+        - matchExpressions:
+            - key: kubernetes.io/hostname
+              operator: In
+              values:
+                - minikube
+---
+kind: PersistentVolume
+apiVersion: v1
+metadata:
+  name: local0-2
+spec:
+  storageClassName: local-storage
+  capacity:
+    storage: 20Gi
+  accessModes:
+    - ReadWriteOnce
+  persistentVolumeReclaimPolicy: Retain
+  # PV for mon must be a filesystem volume.
+  volumeMode: Block
+  local:
+    # To use dm devices like logical volume, please replace `/dev/sdb` with their device names like `/dev/vg-name/lv-name`.
+    path: /dev/vdd
+  nodeAffinity:
+    required:
+      nodeSelectorTerms:
+        - matchExpressions:
+            - key: kubernetes.io/hostname
+              operator: In
+              values:
+                - minikube
+---
+kind: PersistentVolume
+apiVersion: v1
+metadata:
+  name: local0-3
+spec:
+  storageClassName: local-storage
+  capacity:
+    storage: 20Gi
+  accessModes:
+    - ReadWriteOnce
+  persistentVolumeReclaimPolicy: Retain
+  # PV for mon must be a filesystem volume.
+  volumeMode: Block
+  local:
+    # To use dm devices like logical volume, please replace `/dev/sdb` with their device names like `/dev/vg-name/lv-name`.
+    path: /dev/vde
+  nodeAffinity:
+    required:
+      nodeSelectorTerms:
+        - matchExpressions:
+            - key: kubernetes.io/hostname
+              operator: In
+              values:
+                - minikube
+---
+apiVersion: ceph.rook.io/v1
+kind: CephCluster
+metadata:
+  name: my-cluster
+  namespace: rook-ceph # namespace:cluster
+spec:
+  dataDirHostPath: /var/lib/rook
+  mon:
+    count: 1
+    allowMultiplePerNode: true
+    volumeClaimTemplate:
+      spec:
+        storageClassName: local-storage
+        resources:
+          requests:
+            storage: 10Gi
+  mgr:
+    count: 1
+    modules:
+      - name: pg_autoscaler
+        enabled: true
+  dashboard:
+    enabled: true
+    ssl: false
+  crashCollector:
+    disable: false
+  cephVersion:
+    image: quay.io/ceph/daemon-base:latest-main
+    allowUnsupported: true
+  skipUpgradeChecks: false
+  continueUpgradeAfterChecksEvenIfNotHealthy: false
+  storage:
+    storageClassDeviceSets:
+      - name: set1
+        count: 3
+        portable: false
+        tuneDeviceClass: true
+        tuneFastDeviceClass: false
+        encrypted: false
+        placement:
+        preparePlacement:
+        volumeClaimTemplates:
+          - metadata:
+              name: data
+              # if you are looking at giving your OSD a different CRUSH device class than the one detected by Ceph
+              # annotations:
+              #   crushDeviceClass: hybrid
+            spec:
+              resources:
+                requests:
+                  storage: 20Gi
+              # IMPORTANT: Change the storage class depending on your environment
+              storageClassName: local-storage
+              volumeMode: Block
+              accessModes:
+                - ReadWriteOnce
+    # when onlyApplyOSDPlacement is false, will merge both placement.All() and storageClassDeviceSets.Placement
+    onlyApplyOSDPlacement: false
+  priorityClassNames:
+    mon: system-node-critical
+    osd: system-node-critical
+    mgr: system-cluster-critical
+  disruptionManagement:
+    managePodBudgets: true
+    osdMaintenanceTimeout: 30
+    pgHealthCheckTimeout: 0
+  cephConfig:
+    global:
+      mon_warn_on_pool_no_redundancy: "false"
+---
+apiVersion: ceph.rook.io/v1
+kind: CephBlockPool
+metadata:
+  name: builtin-mgr
+  namespace: rook-ceph # namespace:cluster
+spec:
+  name: .mgr
+  failureDomain: osd
+  replicated:
+    size: 1
+    requireSafeReplicaSize: false
index eb4f9fb66ce725beffe6f242d518ea15876d224c..48181708201c33fbf33df694f4c7b273b53f3395 100755 (executable)
@@ -3,7 +3,10 @@
 set -eEx
 
 : ${CEPH_DEV_FOLDER:=${PWD}}
+CLUSTER_SPEC=${CEPH_DEV_FOLDER}/src/pybind/mgr/rook/ci/cluster-specs/cluster-on-pvc-minikube.yaml
+DEFAULT_NS="rook-ceph"
 KUBECTL="minikube kubectl --"
+export ROOK_CLUSTER_NS="${ROOK_CLUSTER_NS:=$DEFAULT_NS}" ## CephCluster namespace
 
 # We build a local ceph image that contains the latest code
 # plus changes from the PR. This image will be used by the docker
@@ -27,14 +30,14 @@ setup_minikube_env() {
     fi
 
     rm -rf ~/.minikube
-    minikube start --memory="4096" --cpus="2" --disk-size=10g --extra-disks=1 --driver kvm2
+    minikube start --disk-size=20g --extra-disks=4 --driver kvm2
     # point Docker env to use docker daemon running on minikube
     eval $(minikube docker-env -p minikube)
 }
 
 build_ceph_image() {
-    wget -q -O cluster-test.yaml https://raw.githubusercontent.com/rook/rook/master/deploy/examples/cluster-test.yaml
-    CURR_CEPH_IMG=$(grep -E '^\s*image:\s+' cluster-test.yaml | sed 's/.*image: *\([^ ]*\)/\1/')
+
+    CURR_CEPH_IMG=$(grep -E '^\s*image:\s+' $CLUSTER_SPEC | sed 's/.*image: *\([^ ]*\)/\1/')
 
     cd ${CEPH_DEV_FOLDER}/src/pybind/mgr/rook/ci
     mkdir -p tmp_build/rook
@@ -54,28 +57,39 @@ build_ceph_image() {
 }
 
 create_rook_cluster() {
-    wget -q -O cluster-test.yaml https://raw.githubusercontent.com/rook/rook/master/deploy/examples/cluster-test.yaml
     $KUBECTL create -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/crds.yaml
     $KUBECTL create -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/common.yaml
     $KUBECTL create -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/operator.yaml
-    $KUBECTL create -f cluster-test.yaml
-    $KUBECTL create -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/dashboard-external-http.yaml
+    $KUBECTL create -f $CLUSTER_SPEC
     $KUBECTL create -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/toolbox.yaml
 }
 
+is_operator_ready() {
+    local phase
+    phase=$($KUBECTL get cephclusters.ceph.rook.io -n rook-ceph -o jsonpath='{.items[?(@.kind == "CephCluster")].status.phase}')
+    echo "PHASE: $phase"
+    [[ "$phase" == "Ready" ]]
+}
+
 wait_for_rook_operator() {
     local max_attempts=10
     local sleep_interval=20
     local attempts=0
+
     $KUBECTL rollout status deployment rook-ceph-operator -n rook-ceph --timeout=180s
-    PHASE=$($KUBECTL get cephclusters.ceph.rook.io -n rook-ceph -o jsonpath='{.items[?(@.kind == "CephCluster")].status.phase}')
-    echo "PHASE: $PHASE"
-    while ! $KUBECTL get cephclusters.ceph.rook.io -n rook-ceph -o jsonpath='{.items[?(@.kind == "CephCluster")].status.phase}' | grep -q "Ready"; do
-       echo "Waiting for cluster to be ready..."
-       sleep $sleep_interval
-       attempts=$((attempts+1))
+
+    while ! is_operator_ready; do
+        echo "Waiting for rook operator to be ready..."
+        sleep $sleep_interval
+
+       # log current cluster state and pods info for debugging
+        PHASE=$($KUBECTL get cephclusters.ceph.rook.io -n rook-ceph -o jsonpath='{.items[?(@.kind == "CephCluster")].status.phase}')
+        $KUBECTL -n rook-ceph get pods
+
+        attempts=$((attempts + 1))
         if [ $attempts -ge $max_attempts ]; then
             echo "Maximum number of attempts ($max_attempts) reached. Exiting..."
+            $KUBECTL -n rook-ceph get pods | grep operator | awk '{print $1}' | xargs $KUBECTL -n rook-ceph logs
             return 1
         fi
     done
@@ -87,7 +101,7 @@ wait_for_ceph_cluster() {
     local attempts=0
     $KUBECTL rollout status deployment rook-ceph-tools -n rook-ceph --timeout=90s
     while ! $KUBECTL get cephclusters.ceph.rook.io -n rook-ceph -o jsonpath='{.items[?(@.kind == "CephCluster")].status.ceph.health}' | grep -q "HEALTH_OK"; do
-       echo "Waiting for Ceph cluster installed"
+       echo "Waiting for Ceph cluster to enter HEALTH_OK" state
        sleep $sleep_interval
        attempts=$((attempts+1))
         if [ $attempts -ge $max_attempts ]; then
@@ -96,18 +110,9 @@ wait_for_ceph_cluster() {
         fi
     done
     echo "Ceph cluster installed and running"
-}
 
-show_info() {
-    DASHBOARD_PASSWORD=$($KUBECTL -n rook-ceph get secret rook-ceph-dashboard-password -o jsonpath="{['data']['password']}" | base64 --decode && echo)
-    IP_ADDR=$($KUBECTL get po --selector="app=rook-ceph-mgr" -n rook-ceph --output jsonpath='{.items[*].status.hostIP}')
-    PORT="$($KUBECTL -n rook-ceph -o=jsonpath='{.spec.ports[?(@.name == "dashboard")].nodePort}' get services rook-ceph-mgr-dashboard-external-http)"
-    BASE_URL="http://$IP_ADDR:$PORT"
-    echo "==========================="
-    echo "Ceph Dashboard:  "
-    echo "   IP_ADDRESS: $BASE_URL"
-    echo "   PASSWORD: $DASHBOARD_PASSWORD"
-    echo "==========================="
+    # add an additional wait to cover with any subttle change in the state
+    sleep 20
 }
 
 configure_libvirt(){
@@ -141,13 +146,21 @@ recreate_default_network(){
 
     # restart libvirtd service and wait a little bit for the service
     sudo systemctl restart libvirtd
-    sleep 20
+    sleep 10
 
     # Just some debugging information
     all_networks=$(virsh net-list --all)
     groups=$(groups)
 }
 
+enable_rook_orchestrator() {
+    echo "Enabling rook orchestrator"
+    $KUBECTL rollout status deployment rook-ceph-tools -n "$ROOK_CLUSTER_NS" --timeout=90s
+    $KUBECTL -n "$ROOK_CLUSTER_NS" exec -it deploy/rook-ceph-tools -- ceph mgr module enable rook
+    $KUBECTL -n "$ROOK_CLUSTER_NS" exec -it deploy/rook-ceph-tools -- ceph orch set backend rook
+    $KUBECTL -n "$ROOK_CLUSTER_NS" exec -it deploy/rook-ceph-tools -- ceph orch status
+}
+
 ####################################################################
 ####################################################################
 
@@ -160,7 +173,7 @@ build_ceph_image
 create_rook_cluster
 wait_for_rook_operator
 wait_for_ceph_cluster
-show_info
+enable_rook_orchestrator
 
 ####################################################################
 ####################################################################
index ae0478f8b9cd70aa43eebf1d12b6f05f302f05a2..acf733f55b4949e05a26e65a1a5712f60c9317da 100644 (file)
@@ -1,8 +1,8 @@
 Feature: Testing Rook orchestrator commands
-    Ceph has been installed using the cluster CRD available in deploy/examples/cluster-test.yaml and
+    Ceph has been installed using the cluster CRD available in deploy/examples/cluster-test.yaml
 
     Scenario: Verify ceph cluster health
-      When I run
+      When I run ceph command
           """
           ceph health | grep HEALTH
           """
@@ -10,3 +10,58 @@ Feature: Testing Rook orchestrator commands
           """
           HEALTH_OK
           """
+
+    Scenario: Verify rook orchestrator has been enabled correctly
+      When I run ceph command
+          """
+          ceph mgr module ls | grep rook
+          """
+      Then I get something like
+          """
+          rook +on
+          """
+
+    Scenario: Verify rook orchestrator lists services correctly
+        When I run ceph command
+            """
+            ceph orch ls
+            """
+        Then I get something like
+            """
+            NAME +PORTS +RUNNING +REFRESHED +AGE +PLACEMENT
+            crash +1/1 .+
+            mgr +1/1 .+
+            mon +1/1 .+
+            osd +3 .+
+            """
+
+    Scenario: Verify rook orchestrator lists daemons correctly
+        When I run ceph command
+            """
+            ceph orch ps
+            """
+        Then I get something like
+            """
+            NAME +HOST +PORTS +STATUS +REFRESHED +AGE +MEM +USE +MEM +LIM +VERSION +IMAGE +ID
+            ceph-exporter.exporter +minikube +running .+
+            crashcollector.crash +minikube +running .+
+            mgr.a +minikube +running .+
+            mon.a +minikube +running .+
+            osd.0 +minikube +running .+
+            osd.1 +minikube +running .+
+            osd.2 +minikube +running .+
+            """
+
+    Scenario: Verify rook orchestrator lists devices correctly
+        When I run ceph command
+            """
+            ceph orch device ls
+            """
+        Then I get something like
+            """
+            HOST +PATH +TYPE +DEVICE +ID +SIZE +AVAILABLE +REFRESHED +REJECT +REASONS
+            minikube +/dev/vdb  +unknown +None +10.0G .+
+            minikube +/dev/vdc  +unknown +None +20.0G .+
+            minikube +/dev/vdd  +unknown +None +20.0G .+
+            minikube +/dev/vde  +unknown +None +20.0G .+
+            """
index adde61afd384b417566bd06141f7095263869462..69dcde458855d74604c78897783bee9183da65b9 100644 (file)
@@ -2,14 +2,19 @@ from behave import *
 from utils import *
 import re
 
-@when("I run")
+@when("I run ceph command")
 def run_step(context):
-    context.output = run_commands(context.text)
+    context.output = run_ceph_commands(context.text)
+
+@when("I run k8s command")
+def run_step(context):
+    context.output = run_k8s_commands(context.text)
 
 @then("I get")
 def verify_result_step(context):
-    print(f"Output is:\n{context.output}\n--------------\n")
-    assert context.text == context.output
+    if (context.text != context.output):
+        display_side_by_side(context.text, context.output)
+    assert context.text == context.output, ""
 
 @then("I get something like")
 def verify_fuzzy_result_step(context):
@@ -18,4 +23,5 @@ def verify_fuzzy_result_step(context):
     num_lines = min(len(output_lines), len(expected_lines))
     for n in range(num_lines):
         if not re.match(expected_lines[n], output_lines[n]):
-            raise
+            display_side_by_side(expected_lines[n], output_lines[n])
+            assert False, ""
index 41a71d0fb1fc94b9aa1f9243daadaf1600f82132..f711ec3fe6ca357fb03af4a3acd4f455a434995b 100644 (file)
@@ -1,4 +1,5 @@
 import subprocess
+from difflib import unified_diff
 
 ROOK_CEPH_COMMAND = "minikube kubectl -- -n rook-ceph exec -it deploy/rook-ceph-tools -- "
 CLUSTER_COMMAND = "minikube kubectl -- "
@@ -27,3 +28,25 @@ def run_commands(commands: str) -> str:
         output = execute_command(command)
 
     return output.strip("\n")
+
+def run_k8s_commands(commands: str) -> str:
+    commands_list = commands.split("\n")
+    output = ""
+    for cmd in commands_list:
+        command = CLUSTER_COMMAND + cmd
+        output = execute_command(command)
+
+    return output.strip("\n")
+
+def run_ceph_commands(commands: str) -> str:
+    commands_list = commands.split("\n")
+    output = ""
+    for cmd in commands_list:
+        command = ROOK_CEPH_COMMAND + cmd
+        output = execute_command(command)
+
+    return output.strip("\n")
+
+def display_side_by_side(expected, got):
+    diff = unified_diff(expected.splitlines(), got.splitlines(), lineterm='')
+    print('\n'.join(diff))