]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
qa/suites/krbd: stress test for recovering from watch errors 53786/head
authorIlya Dryomov <idryomov@gmail.com>
Wed, 27 Sep 2023 18:17:11 +0000 (20:17 +0200)
committerIlya Dryomov <idryomov@gmail.com>
Tue, 3 Oct 2023 10:18:43 +0000 (12:18 +0200)
Fixes: https://tracker.ceph.com/issues/63010
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
(cherry picked from commit 237aa221ebad429457a621d2e38cfdf0025e38f9)

13 files changed:
qa/suites/krbd/singleton/% [new file with mode: 0644]
qa/suites/krbd/singleton/.qa [new symlink]
qa/suites/krbd/singleton/bluestore-bitmap.yaml [new symlink]
qa/suites/krbd/singleton/conf.yaml [new file with mode: 0644]
qa/suites/krbd/singleton/ms_mode$/.qa [new symlink]
qa/suites/krbd/singleton/ms_mode$/crc-rxbounce.yaml [new file with mode: 0644]
qa/suites/krbd/singleton/ms_mode$/crc.yaml [new file with mode: 0644]
qa/suites/krbd/singleton/ms_mode$/legacy-rxbounce.yaml [new file with mode: 0644]
qa/suites/krbd/singleton/ms_mode$/legacy.yaml [new file with mode: 0644]
qa/suites/krbd/singleton/ms_mode$/secure.yaml [new file with mode: 0644]
qa/suites/krbd/singleton/tasks/.qa [new symlink]
qa/suites/krbd/singleton/tasks/krbd_watch_errors.yaml [new file with mode: 0644]
qa/workunits/rbd/krbd_watch_errors.sh [new file with mode: 0755]

diff --git a/qa/suites/krbd/singleton/% b/qa/suites/krbd/singleton/%
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/qa/suites/krbd/singleton/.qa b/qa/suites/krbd/singleton/.qa
new file mode 120000 (symlink)
index 0000000..a602a03
--- /dev/null
@@ -0,0 +1 @@
+../.qa/
\ No newline at end of file
diff --git a/qa/suites/krbd/singleton/bluestore-bitmap.yaml b/qa/suites/krbd/singleton/bluestore-bitmap.yaml
new file mode 120000 (symlink)
index 0000000..a59cf51
--- /dev/null
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file
diff --git a/qa/suites/krbd/singleton/conf.yaml b/qa/suites/krbd/singleton/conf.yaml
new file mode 100644 (file)
index 0000000..41292fa
--- /dev/null
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon warn on pool no app: false
+        ms die on skipped message: false
+      client:
+        rbd default features: 37
diff --git a/qa/suites/krbd/singleton/ms_mode$/.qa b/qa/suites/krbd/singleton/ms_mode$/.qa
new file mode 120000 (symlink)
index 0000000..a602a03
--- /dev/null
@@ -0,0 +1 @@
+../.qa/
\ No newline at end of file
diff --git a/qa/suites/krbd/singleton/ms_mode$/crc-rxbounce.yaml b/qa/suites/krbd/singleton/ms_mode$/crc-rxbounce.yaml
new file mode 100644 (file)
index 0000000..4d27d01
--- /dev/null
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc,rxbounce
diff --git a/qa/suites/krbd/singleton/ms_mode$/crc.yaml b/qa/suites/krbd/singleton/ms_mode$/crc.yaml
new file mode 100644 (file)
index 0000000..3b07257
--- /dev/null
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc
diff --git a/qa/suites/krbd/singleton/ms_mode$/legacy-rxbounce.yaml b/qa/suites/krbd/singleton/ms_mode$/legacy-rxbounce.yaml
new file mode 100644 (file)
index 0000000..244e45c
--- /dev/null
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy,rxbounce
diff --git a/qa/suites/krbd/singleton/ms_mode$/legacy.yaml b/qa/suites/krbd/singleton/ms_mode$/legacy.yaml
new file mode 100644 (file)
index 0000000..0048dcb
--- /dev/null
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy
diff --git a/qa/suites/krbd/singleton/ms_mode$/secure.yaml b/qa/suites/krbd/singleton/ms_mode$/secure.yaml
new file mode 100644 (file)
index 0000000..a735db1
--- /dev/null
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=secure
diff --git a/qa/suites/krbd/singleton/tasks/.qa b/qa/suites/krbd/singleton/tasks/.qa
new file mode 120000 (symlink)
index 0000000..a602a03
--- /dev/null
@@ -0,0 +1 @@
+../.qa/
\ No newline at end of file
diff --git a/qa/suites/krbd/singleton/tasks/krbd_watch_errors.yaml b/qa/suites/krbd/singleton/tasks/krbd_watch_errors.yaml
new file mode 100644 (file)
index 0000000..5e30ef2
--- /dev/null
@@ -0,0 +1,19 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd pool default size: 1
+      osd:
+        osd shutdown pgref assert: true
+roles:
+- [mon.a, mgr.x, osd.0, client.0]
+
+tasks:
+- install:
+    extra_system_packages:
+      - fio
+- ceph:
+- workunit:
+    clients:
+      all:
+        - rbd/krbd_watch_errors.sh
diff --git a/qa/workunits/rbd/krbd_watch_errors.sh b/qa/workunits/rbd/krbd_watch_errors.sh
new file mode 100755 (executable)
index 0000000..f650d2a
--- /dev/null
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+
+set -ex
+set -o pipefail
+
+function refresh_loop() {
+    local dev_id="$1"
+
+    set +x
+
+    local i
+    for ((i = 1; ; i++)); do
+        echo 1 | sudo tee "${SYSFS_DIR}/${dev_id}/refresh" > /dev/null
+        if ((i % 100 == 0)); then
+            echo "Refreshed ${i} times"
+        fi
+    done
+}
+
+readonly SYSFS_DIR="/sys/bus/rbd/devices"
+readonly IMAGE_NAME="watch-errors-test"
+
+rbd create -s 1G --image-feature exclusive-lock "${IMAGE_NAME}"
+
+# induce a watch error every 30 seconds
+dev="$(sudo rbd device map -o osdkeepalive=60 "${IMAGE_NAME}")"
+dev_id="${dev#/dev/rbd}"
+
+# constantly refresh, not just on watch errors
+refresh_loop "${dev_id}" &
+refresh_pid=$!
+
+sudo dmesg -C
+
+# test that none of the above triggers a deadlock with a workload
+fio --name test --filename="${dev}" --ioengine=libaio --direct=1 \
+    --rw=randwrite --norandommap --randrepeat=0 --bs=512 --iodepth=128 \
+    --time_based --runtime=1h --eta=never
+
+num_errors="$(dmesg | grep -c "rbd${dev_id}: encountered watch error")"
+echo "Recorded ${num_errors} watch errors"
+
+kill "${refresh_pid}"
+wait
+
+sudo rbd device unmap "${dev}"
+
+if ((num_errors < 60)); then
+    echo "Too few watch errors"
+    exit 1
+fi
+
+echo OK