From 1b3454914ee6be7973f8391e1fb7d0d67d9092ce Mon Sep 17 00:00:00 2001 From: Redouane Kachach Date: Mon, 15 Jan 2024 15:25:02 +0100 Subject: [PATCH] mgr/rook: adding some basic rook e2e testing Fixes: https://tracker.ceph.com/issues/64029 Signed-off-by: Redouane Kachach --- .../cluster-on-pvc-minikube.yaml | 198 ++++++++++++++++++ .../rook/ci/scripts/bootstrap-rook-cluster.sh | 65 +++--- .../mgr/rook/ci/tests/features/rook.feature | 59 +++++- .../ci/tests/features/steps/implementation.py | 16 +- .../mgr/rook/ci/tests/features/steps/utils.py | 23 ++ 5 files changed, 328 insertions(+), 33 deletions(-) create mode 100644 src/pybind/mgr/rook/ci/cluster-specs/cluster-on-pvc-minikube.yaml diff --git a/src/pybind/mgr/rook/ci/cluster-specs/cluster-on-pvc-minikube.yaml b/src/pybind/mgr/rook/ci/cluster-specs/cluster-on-pvc-minikube.yaml new file mode 100644 index 0000000000000..2732286aba0c6 --- /dev/null +++ b/src/pybind/mgr/rook/ci/cluster-specs/cluster-on-pvc-minikube.yaml @@ -0,0 +1,198 @@ +################################################################################################################# +# Define the settings for the rook-ceph cluster with settings for a minikube cluster with a single node + +# This example expects a single node minikube cluster with three extra disks: vdb, vdc and vdd. Please modify +# it according to your environment. See the documentation for more details on storage settings available. + +# For example, to create the cluster: +# kubectl create -f crds.yaml -f common.yaml -f operator.yaml +# kubectl create -f cluster-on-pvc-minikube.yaml +################################################################################################################# +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: local-storage +provisioner: kubernetes.io/no-provisioner +volumeBindingMode: WaitForFirstConsumer +--- +kind: PersistentVolume +apiVersion: v1 +metadata: + name: local0-0 +spec: + storageClassName: local-storage + capacity: + storage: 10Gi + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Retain + # PV for mon must be a filesystem volume. + volumeMode: Filesystem + local: + # To use dm devices like logical volume, please replace `/dev/sdb` with their device names like `/dev/vg-name/lv-name`. + path: /dev/vdb + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - minikube +--- +kind: PersistentVolume +apiVersion: v1 +metadata: + name: local0-1 +spec: + storageClassName: local-storage + capacity: + storage: 20Gi + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Retain + # PV for mon must be a filesystem volume. + volumeMode: Block + local: + # To use dm devices like logical volume, please replace `/dev/sdb` with their device names like `/dev/vg-name/lv-name`. + path: /dev/vdc + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - minikube +--- +kind: PersistentVolume +apiVersion: v1 +metadata: + name: local0-2 +spec: + storageClassName: local-storage + capacity: + storage: 20Gi + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Retain + # PV for mon must be a filesystem volume. + volumeMode: Block + local: + # To use dm devices like logical volume, please replace `/dev/sdb` with their device names like `/dev/vg-name/lv-name`. + path: /dev/vdd + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - minikube +--- +kind: PersistentVolume +apiVersion: v1 +metadata: + name: local0-3 +spec: + storageClassName: local-storage + capacity: + storage: 20Gi + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Retain + # PV for mon must be a filesystem volume. + volumeMode: Block + local: + # To use dm devices like logical volume, please replace `/dev/sdb` with their device names like `/dev/vg-name/lv-name`. + path: /dev/vde + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - minikube +--- +apiVersion: ceph.rook.io/v1 +kind: CephCluster +metadata: + name: my-cluster + namespace: rook-ceph # namespace:cluster +spec: + dataDirHostPath: /var/lib/rook + mon: + count: 1 + allowMultiplePerNode: true + volumeClaimTemplate: + spec: + storageClassName: local-storage + resources: + requests: + storage: 10Gi + mgr: + count: 1 + modules: + - name: pg_autoscaler + enabled: true + dashboard: + enabled: true + ssl: false + crashCollector: + disable: false + cephVersion: + image: quay.io/ceph/daemon-base:latest-main + allowUnsupported: true + skipUpgradeChecks: false + continueUpgradeAfterChecksEvenIfNotHealthy: false + storage: + storageClassDeviceSets: + - name: set1 + count: 3 + portable: false + tuneDeviceClass: true + tuneFastDeviceClass: false + encrypted: false + placement: + preparePlacement: + volumeClaimTemplates: + - metadata: + name: data + # if you are looking at giving your OSD a different CRUSH device class than the one detected by Ceph + # annotations: + # crushDeviceClass: hybrid + spec: + resources: + requests: + storage: 20Gi + # IMPORTANT: Change the storage class depending on your environment + storageClassName: local-storage + volumeMode: Block + accessModes: + - ReadWriteOnce + # when onlyApplyOSDPlacement is false, will merge both placement.All() and storageClassDeviceSets.Placement + onlyApplyOSDPlacement: false + priorityClassNames: + mon: system-node-critical + osd: system-node-critical + mgr: system-cluster-critical + disruptionManagement: + managePodBudgets: true + osdMaintenanceTimeout: 30 + pgHealthCheckTimeout: 0 + cephConfig: + global: + mon_warn_on_pool_no_redundancy: "false" +--- +apiVersion: ceph.rook.io/v1 +kind: CephBlockPool +metadata: + name: builtin-mgr + namespace: rook-ceph # namespace:cluster +spec: + name: .mgr + failureDomain: osd + replicated: + size: 1 + requireSafeReplicaSize: false diff --git a/src/pybind/mgr/rook/ci/scripts/bootstrap-rook-cluster.sh b/src/pybind/mgr/rook/ci/scripts/bootstrap-rook-cluster.sh index eb4f9fb66ce72..48181708201c3 100755 --- a/src/pybind/mgr/rook/ci/scripts/bootstrap-rook-cluster.sh +++ b/src/pybind/mgr/rook/ci/scripts/bootstrap-rook-cluster.sh @@ -3,7 +3,10 @@ set -eEx : ${CEPH_DEV_FOLDER:=${PWD}} +CLUSTER_SPEC=${CEPH_DEV_FOLDER}/src/pybind/mgr/rook/ci/cluster-specs/cluster-on-pvc-minikube.yaml +DEFAULT_NS="rook-ceph" KUBECTL="minikube kubectl --" +export ROOK_CLUSTER_NS="${ROOK_CLUSTER_NS:=$DEFAULT_NS}" ## CephCluster namespace # We build a local ceph image that contains the latest code # plus changes from the PR. This image will be used by the docker @@ -27,14 +30,14 @@ setup_minikube_env() { fi rm -rf ~/.minikube - minikube start --memory="4096" --cpus="2" --disk-size=10g --extra-disks=1 --driver kvm2 + minikube start --disk-size=20g --extra-disks=4 --driver kvm2 # point Docker env to use docker daemon running on minikube eval $(minikube docker-env -p minikube) } build_ceph_image() { - wget -q -O cluster-test.yaml https://raw.githubusercontent.com/rook/rook/master/deploy/examples/cluster-test.yaml - CURR_CEPH_IMG=$(grep -E '^\s*image:\s+' cluster-test.yaml | sed 's/.*image: *\([^ ]*\)/\1/') + + CURR_CEPH_IMG=$(grep -E '^\s*image:\s+' $CLUSTER_SPEC | sed 's/.*image: *\([^ ]*\)/\1/') cd ${CEPH_DEV_FOLDER}/src/pybind/mgr/rook/ci mkdir -p tmp_build/rook @@ -54,28 +57,39 @@ build_ceph_image() { } create_rook_cluster() { - wget -q -O cluster-test.yaml https://raw.githubusercontent.com/rook/rook/master/deploy/examples/cluster-test.yaml $KUBECTL create -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/crds.yaml $KUBECTL create -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/common.yaml $KUBECTL create -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/operator.yaml - $KUBECTL create -f cluster-test.yaml - $KUBECTL create -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/dashboard-external-http.yaml + $KUBECTL create -f $CLUSTER_SPEC $KUBECTL create -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/toolbox.yaml } +is_operator_ready() { + local phase + phase=$($KUBECTL get cephclusters.ceph.rook.io -n rook-ceph -o jsonpath='{.items[?(@.kind == "CephCluster")].status.phase}') + echo "PHASE: $phase" + [[ "$phase" == "Ready" ]] +} + wait_for_rook_operator() { local max_attempts=10 local sleep_interval=20 local attempts=0 + $KUBECTL rollout status deployment rook-ceph-operator -n rook-ceph --timeout=180s - PHASE=$($KUBECTL get cephclusters.ceph.rook.io -n rook-ceph -o jsonpath='{.items[?(@.kind == "CephCluster")].status.phase}') - echo "PHASE: $PHASE" - while ! $KUBECTL get cephclusters.ceph.rook.io -n rook-ceph -o jsonpath='{.items[?(@.kind == "CephCluster")].status.phase}' | grep -q "Ready"; do - echo "Waiting for cluster to be ready..." - sleep $sleep_interval - attempts=$((attempts+1)) + + while ! is_operator_ready; do + echo "Waiting for rook operator to be ready..." + sleep $sleep_interval + + # log current cluster state and pods info for debugging + PHASE=$($KUBECTL get cephclusters.ceph.rook.io -n rook-ceph -o jsonpath='{.items[?(@.kind == "CephCluster")].status.phase}') + $KUBECTL -n rook-ceph get pods + + attempts=$((attempts + 1)) if [ $attempts -ge $max_attempts ]; then echo "Maximum number of attempts ($max_attempts) reached. Exiting..." + $KUBECTL -n rook-ceph get pods | grep operator | awk '{print $1}' | xargs $KUBECTL -n rook-ceph logs return 1 fi done @@ -87,7 +101,7 @@ wait_for_ceph_cluster() { local attempts=0 $KUBECTL rollout status deployment rook-ceph-tools -n rook-ceph --timeout=90s while ! $KUBECTL get cephclusters.ceph.rook.io -n rook-ceph -o jsonpath='{.items[?(@.kind == "CephCluster")].status.ceph.health}' | grep -q "HEALTH_OK"; do - echo "Waiting for Ceph cluster installed" + echo "Waiting for Ceph cluster to enter HEALTH_OK" state sleep $sleep_interval attempts=$((attempts+1)) if [ $attempts -ge $max_attempts ]; then @@ -96,18 +110,9 @@ wait_for_ceph_cluster() { fi done echo "Ceph cluster installed and running" -} -show_info() { - DASHBOARD_PASSWORD=$($KUBECTL -n rook-ceph get secret rook-ceph-dashboard-password -o jsonpath="{['data']['password']}" | base64 --decode && echo) - IP_ADDR=$($KUBECTL get po --selector="app=rook-ceph-mgr" -n rook-ceph --output jsonpath='{.items[*].status.hostIP}') - PORT="$($KUBECTL -n rook-ceph -o=jsonpath='{.spec.ports[?(@.name == "dashboard")].nodePort}' get services rook-ceph-mgr-dashboard-external-http)" - BASE_URL="http://$IP_ADDR:$PORT" - echo "===========================" - echo "Ceph Dashboard: " - echo " IP_ADDRESS: $BASE_URL" - echo " PASSWORD: $DASHBOARD_PASSWORD" - echo "===========================" + # add an additional wait to cover with any subttle change in the state + sleep 20 } configure_libvirt(){ @@ -141,13 +146,21 @@ recreate_default_network(){ # restart libvirtd service and wait a little bit for the service sudo systemctl restart libvirtd - sleep 20 + sleep 10 # Just some debugging information all_networks=$(virsh net-list --all) groups=$(groups) } +enable_rook_orchestrator() { + echo "Enabling rook orchestrator" + $KUBECTL rollout status deployment rook-ceph-tools -n "$ROOK_CLUSTER_NS" --timeout=90s + $KUBECTL -n "$ROOK_CLUSTER_NS" exec -it deploy/rook-ceph-tools -- ceph mgr module enable rook + $KUBECTL -n "$ROOK_CLUSTER_NS" exec -it deploy/rook-ceph-tools -- ceph orch set backend rook + $KUBECTL -n "$ROOK_CLUSTER_NS" exec -it deploy/rook-ceph-tools -- ceph orch status +} + #################################################################### #################################################################### @@ -160,7 +173,7 @@ build_ceph_image create_rook_cluster wait_for_rook_operator wait_for_ceph_cluster -show_info +enable_rook_orchestrator #################################################################### #################################################################### diff --git a/src/pybind/mgr/rook/ci/tests/features/rook.feature b/src/pybind/mgr/rook/ci/tests/features/rook.feature index ae0478f8b9cd7..acf733f55b494 100644 --- a/src/pybind/mgr/rook/ci/tests/features/rook.feature +++ b/src/pybind/mgr/rook/ci/tests/features/rook.feature @@ -1,8 +1,8 @@ Feature: Testing Rook orchestrator commands - Ceph has been installed using the cluster CRD available in deploy/examples/cluster-test.yaml and + Ceph has been installed using the cluster CRD available in deploy/examples/cluster-test.yaml Scenario: Verify ceph cluster health - When I run + When I run ceph command """ ceph health | grep HEALTH """ @@ -10,3 +10,58 @@ Feature: Testing Rook orchestrator commands """ HEALTH_OK """ + + Scenario: Verify rook orchestrator has been enabled correctly + When I run ceph command + """ + ceph mgr module ls | grep rook + """ + Then I get something like + """ + rook +on + """ + + Scenario: Verify rook orchestrator lists services correctly + When I run ceph command + """ + ceph orch ls + """ + Then I get something like + """ + NAME +PORTS +RUNNING +REFRESHED +AGE +PLACEMENT + crash +1/1 .+ + mgr +1/1 .+ + mon +1/1 .+ + osd +3 .+ + """ + + Scenario: Verify rook orchestrator lists daemons correctly + When I run ceph command + """ + ceph orch ps + """ + Then I get something like + """ + NAME +HOST +PORTS +STATUS +REFRESHED +AGE +MEM +USE +MEM +LIM +VERSION +IMAGE +ID + ceph-exporter.exporter +minikube +running .+ + crashcollector.crash +minikube +running .+ + mgr.a +minikube +running .+ + mon.a +minikube +running .+ + osd.0 +minikube +running .+ + osd.1 +minikube +running .+ + osd.2 +minikube +running .+ + """ + + Scenario: Verify rook orchestrator lists devices correctly + When I run ceph command + """ + ceph orch device ls + """ + Then I get something like + """ + HOST +PATH +TYPE +DEVICE +ID +SIZE +AVAILABLE +REFRESHED +REJECT +REASONS + minikube +/dev/vdb +unknown +None +10.0G .+ + minikube +/dev/vdc +unknown +None +20.0G .+ + minikube +/dev/vdd +unknown +None +20.0G .+ + minikube +/dev/vde +unknown +None +20.0G .+ + """ diff --git a/src/pybind/mgr/rook/ci/tests/features/steps/implementation.py b/src/pybind/mgr/rook/ci/tests/features/steps/implementation.py index adde61afd384b..69dcde458855d 100644 --- a/src/pybind/mgr/rook/ci/tests/features/steps/implementation.py +++ b/src/pybind/mgr/rook/ci/tests/features/steps/implementation.py @@ -2,14 +2,19 @@ from behave import * from utils import * import re -@when("I run") +@when("I run ceph command") def run_step(context): - context.output = run_commands(context.text) + context.output = run_ceph_commands(context.text) + +@when("I run k8s command") +def run_step(context): + context.output = run_k8s_commands(context.text) @then("I get") def verify_result_step(context): - print(f"Output is:\n{context.output}\n--------------\n") - assert context.text == context.output + if (context.text != context.output): + display_side_by_side(context.text, context.output) + assert context.text == context.output, "" @then("I get something like") def verify_fuzzy_result_step(context): @@ -18,4 +23,5 @@ def verify_fuzzy_result_step(context): num_lines = min(len(output_lines), len(expected_lines)) for n in range(num_lines): if not re.match(expected_lines[n], output_lines[n]): - raise + display_side_by_side(expected_lines[n], output_lines[n]) + assert False, "" diff --git a/src/pybind/mgr/rook/ci/tests/features/steps/utils.py b/src/pybind/mgr/rook/ci/tests/features/steps/utils.py index 41a71d0fb1fc9..f711ec3fe6ca3 100644 --- a/src/pybind/mgr/rook/ci/tests/features/steps/utils.py +++ b/src/pybind/mgr/rook/ci/tests/features/steps/utils.py @@ -1,4 +1,5 @@ import subprocess +from difflib import unified_diff ROOK_CEPH_COMMAND = "minikube kubectl -- -n rook-ceph exec -it deploy/rook-ceph-tools -- " CLUSTER_COMMAND = "minikube kubectl -- " @@ -27,3 +28,25 @@ def run_commands(commands: str) -> str: output = execute_command(command) return output.strip("\n") + +def run_k8s_commands(commands: str) -> str: + commands_list = commands.split("\n") + output = "" + for cmd in commands_list: + command = CLUSTER_COMMAND + cmd + output = execute_command(command) + + return output.strip("\n") + +def run_ceph_commands(commands: str) -> str: + commands_list = commands.split("\n") + output = "" + for cmd in commands_list: + command = ROOK_CEPH_COMMAND + cmd + output = execute_command(command) + + return output.strip("\n") + +def display_side_by_side(expected, got): + diff = unified_diff(expected.splitlines(), got.splitlines(), lineterm='') + print('\n'.join(diff)) -- 2.39.5