From: Ilya Dryomov Date: Wed, 24 Jul 2024 06:44:46 +0000 (+0200) Subject: qa/suites/krbd: stress test for recovering from watch errors for -o exclusive X-Git-Tag: v19.1.1~52^2~1 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=448e260dd5637512126a6b1a3234d7dc56eca6e9;p=ceph.git qa/suites/krbd: stress test for recovering from watch errors for -o exclusive This is based on a test added in commit 237aa221ebad ("qa/suites/krbd: stress test for recovering from watch errors") for regular mappings. Fixes: https://tracker.ceph.com/issues/67097 Signed-off-by: Ilya Dryomov (cherry picked from commit 8fee41da8b8cd250bbbd8490604193c0864c1295) --- diff --git a/qa/suites/krbd/singleton/tasks/krbd_watch_errors_exclusive.yaml b/qa/suites/krbd/singleton/tasks/krbd_watch_errors_exclusive.yaml new file mode 100644 index 0000000000000..aeab129ed7ecf --- /dev/null +++ b/qa/suites/krbd/singleton/tasks/krbd_watch_errors_exclusive.yaml @@ -0,0 +1,19 @@ +overrides: + ceph: + conf: + global: + osd pool default size: 1 + osd: + osd shutdown pgref assert: true +roles: +- [mon.a, mgr.x, osd.0, client.0] + +tasks: +- install: + extra_system_packages: + - fio +- ceph: +- workunit: + clients: + all: + - rbd/krbd_watch_errors_exclusive.sh diff --git a/qa/workunits/rbd/krbd_watch_errors_exclusive.sh b/qa/workunits/rbd/krbd_watch_errors_exclusive.sh new file mode 100755 index 0000000000000..e0b9586ec66f8 --- /dev/null +++ b/qa/workunits/rbd/krbd_watch_errors_exclusive.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +set -ex +set -o pipefail + +readonly IMAGE_NAME="watch-errors-exclusive-test" + +rbd create -s 1G --image-feature exclusive-lock,object-map "${IMAGE_NAME}" + +# induce a watch error every 30 seconds +dev="$(sudo rbd device map -o exclusive,osdkeepalive=60 "${IMAGE_NAME}")" +dev_id="${dev#/dev/rbd}" + +sudo dmesg -C + +# test that a workload doesn't encounter EIO errors +fio --name test --filename="${dev}" --ioengine=libaio --direct=1 \ + --rw=randwrite --norandommap --randrepeat=0 --bs=512 --iodepth=128 \ + --time_based --runtime=1h --eta=never + +num_errors="$(dmesg | grep -c "rbd${dev_id}: encountered watch error")" +echo "Recorded ${num_errors} watch errors" + +sudo rbd device unmap "${dev}" + +if ((num_errors < 60)); then + echo "Too few watch errors" + exit 1 +fi + +echo OK