- cephadm.wait_for_service:
service: nvmeof.mypool.mygroup0
+## Enable gateway in DEBUG mode
+# - cephadm.exec:
+# host.a:
+# - ceph orch ls nvmeof --export > /tmp/nvmeof-orig.yaml
+# - "sed 's/log_level:\ INFO/log_level:\ DEBUG/g' /tmp/nvmeof-orig.yaml > /tmp/nvmeof-debug.yaml"
+# - cat /tmp/nvmeof-debug.yaml
+# - ceph orch apply -i /tmp/nvmeof-debug.yaml
+# - ceph orch redeploy nvmeof.mypool.mygroup0
+
+# - cephadm.wait_for_service:
+# service: nvmeof.mypool.mygroup0
+
- workunit:
no_coverage_and_limits: true
timeout: 30m
source /etc/ceph/nvmeof.env
if [ -z "$GATEWAYS" ]; then
- echo "At least one gateway needs to be defined for scalability test"
+ echo "[nvmeof.scale] At least one gateway needs to be defined for scalability test"
exit 1
fi
status_checks() {
+ status_checks_ $1
+ if [ $? -eq 0 ]; then
+ echo "[nvmeof.scale] Verified successfully that everything is working with $1 gateways"
+ else
+ echo "[nvmeof.scale] Verification failed!"
+ sudo dmesg -T > $TESTDIR/archive/dmesg-scalability_test.log
+ exit 1
+ fi
+}
+
+status_checks_() {
expected_count=$1
+ echo "[nvmeof.scale] Verifying that everything is working with $expected_count gateways"
output=$(ceph nvme-gw show $POOL $GROUP)
# nvme_show=$(echo $output | grep -o '"AVAILABLE"' | wc -l)
num_namespaces=$(echo "$gw" | jq '.["num-namespaces"]')
if [[ "$availability" != "AVAILABLE" ]]; then
- echo "Gateway $gw_id is not AVAILABLE."
- exit 1
+ echo "[nvmeof.scale] Gateway $gw_id is not AVAILABLE."
+ return 1
fi
diff=$((num_namespaces - expected_avg_ns))
if [[ $diff -lt -1 || $diff -gt 1 ]]; then
- echo "Gateway $gw_id has num-namespaces ($num_namespaces), expected around $expected_ns_count. Indicates a problem in ns load-balancing."
- exit 1
+ echo "[nvmeof.scale] Gateway $gw_id has num-namespaces ($num_namespaces), expected around $expected_avg_ns. Indicates a problem in ns load-balancing."
+ return 1
fi
done
orch_ls=$(ceph orch ls)
if ! echo "$orch_ls" | grep -q "$expected_count/$expected_count"; then
+ echo "[nvmeof.scale] Expected $expected_count running gateways in 'ceph orch ls'"
return 1
fi
output=$(ceph orch ps --service-name nvmeof.$POOL.$GROUP)
orch_ps=$(echo $output | grep -o 'running' | wc -l)
if [ "$orch_ps" -ne "$expected_count" ]; then
+ echo "[nvmeof.scale] Expected $expected_count running gateways in 'ceph orch ps', but found $orch_ps"
return 1
fi
scaled_down_gateways_count=$(( total_gateways_count - $(echo "$GATEWAYS" | tr -cd ',' | wc -c) - 1 ))
-echo "[nvmeof.scale] Setting up config to remove gateways ${GATEWAYS}"
+echo "[nvmeof.scale] SCALE DOWN: Setting up config to remove gateways ${GATEWAYS}"
ceph orch ls --service-name nvmeof.$POOL.$GROUP --export > /tmp/nvmeof-gw.yaml
ceph orch ls nvmeof --export > /tmp/nvmeof-gw.yaml
cat /tmp/nvmeof-gw.yaml
sed "/$pattern/d" /tmp/nvmeof-gw.yaml > /tmp/nvmeof-gw-new.yaml
cat /tmp/nvmeof-gw-new.yaml
-echo "[nvmeof.scale] Starting scale testing by removing ${GATEWAYS}"
+echo "[nvmeof.scale] SCALE DOWN: Starting scale testing by removing ${GATEWAYS}"
status_checks $total_gateways_count
ceph orch apply -i /tmp/nvmeof-gw-new.yaml # downscale
-ceph orch redeploy nvmeof.$POOL.$GROUP
+# ceph orch redeploy nvmeof.$POOL.$GROUP
sleep $DELAY
status_checks $scaled_down_gateways_count
-echo "[nvmeof.scale] Downscale complete - removed gateways (${GATEWAYS}); now scaling back up"
+echo "[nvmeof.scale] SCALE DOWN successful! Removed gateways (${GATEWAYS}) and verified;"
+echo "[nvmeof.scale] SCALE UP: scaling up to $total_gateways_count gateways (from $scaled_down_gateways_count gateways)"
ceph orch apply -i /tmp/nvmeof-gw.yaml #upscale
-ceph orch redeploy nvmeof.$POOL.$GROUP
+# ceph orch redeploy nvmeof.$POOL.$GROUP
sleep $DELAY
status_checks $total_gateways_count
-
+echo "[nvmeof.scale] SCALE UP successful! All gateways running and verified."
echo "[nvmeof.scale] Scale testing passed for ${GATEWAYS}"