test: Add test for mgr hang when osd is full

author Kotresh HR <khiremat@redhat.com>

Thu, 11 Mar 2021 16:44:55 +0000 (22:14 +0530)

committer Kotresh HR <khiremat@redhat.com>

Sat, 22 May 2021 13:39:07 +0000 (19:09 +0530)
author Kotresh HR <khiremat@redhat.com>
Thu, 11 Mar 2021 16:44:55 +0000 (22:14 +0530)
committer Kotresh HR <khiremat@redhat.com>
Sat, 22 May 2021 13:39:07 +0000 (19:09 +0530)
diff --git a/qa/cephfs/clusters/1-node-1-mds-1-osd.yaml b/qa/cephfs/clusters/1-node-1-mds-1-osd.yaml

new file mode 100644 (file)

index 0000000..865b976
--- /dev/null
+++ b/qa/cephfs/clusters/1-node-1-mds-1-osd.yaml
@@ -0,0 +1,8 @@
+roles:
+- [mon.a, mgr.x, mds.a, osd.0, client.0]
+openstack:
+- volumes: # attached to each instance
+    count: 1
+    size: 5 # GB
+- machine:
+    disk: 10 # GB
diff --git a/qa/suites/fs/full/% b/qa/suites/fs/full/%

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/qa/suites/fs/full/.qa b/qa/suites/fs/full/.qa

new file mode 120000 (symlink)

index 0000000..a602a03
--- /dev/null
+++ b/qa/suites/fs/full/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/full/begin.yaml b/qa/suites/fs/full/begin.yaml

new file mode 120000 (symlink)

index 0000000..311d404
--- /dev/null
+++ b/qa/suites/fs/full/begin.yaml
@@ -0,0 +1 @@
+.qa/cephfs/begin.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/full/clusters/.qa b/qa/suites/fs/full/clusters/.qa

new file mode 120000 (symlink)

index 0000000..a602a03
--- /dev/null
+++ b/qa/suites/fs/full/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/full/clusters/1-node-1-mds-1-osd.yaml b/qa/suites/fs/full/clusters/1-node-1-mds-1-osd.yaml

new file mode 120000 (symlink)

index 0000000..517b765
--- /dev/null
+++ b/qa/suites/fs/full/clusters/1-node-1-mds-1-osd.yaml
@@ -0,0 +1 @@
+.qa/cephfs/clusters/1-node-1-mds-1-osd.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/full/conf b/qa/suites/fs/full/conf

new file mode 120000 (symlink)

index 0000000..16e8cc4
--- /dev/null
+++ b/qa/suites/fs/full/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf
+\ No newline at end of file
diff --git a/qa/suites/fs/full/distro b/qa/suites/fs/full/distro

new file mode 120000 (symlink)

index 0000000..0862b44
--- /dev/null
+++ b/qa/suites/fs/full/distro
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/fs/full/mount/fuse.yaml b/qa/suites/fs/full/mount/fuse.yaml

new file mode 100644 (file)

index 0000000..8338cc4
--- /dev/null
+++ b/qa/suites/fs/full/mount/fuse.yaml
@@ -0,0 +1,2 @@
+tasks:
+  - ceph-fuse:
diff --git a/qa/suites/fs/full/objectstore/.qa b/qa/suites/fs/full/objectstore/.qa

new file mode 120000 (symlink)

index 0000000..a602a03
--- /dev/null
+++ b/qa/suites/fs/full/objectstore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/full/objectstore/bluestore-bitmap.yaml b/qa/suites/fs/full/objectstore/bluestore-bitmap.yaml

new file mode 120000 (symlink)

index 0000000..a59cf51
--- /dev/null
+++ b/qa/suites/fs/full/objectstore/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/full/overrides.yaml b/qa/suites/fs/full/overrides.yaml

new file mode 100644 (file)

index 0000000..921528d
--- /dev/null
+++ b/qa/suites/fs/full/overrides.yaml
@@ -0,0 +1,19 @@
+overrides:
+  ceph:
+    conf:
+      mgr:
+        debug client: 20
+    log-ignorelist:
+      - OSD full dropping all updates
+      - OSD near full
+      - pausewr flag
+      - failsafe engaged, dropping updates
+      - failsafe disengaged, no longer dropping
+      - is full \(reached quota
+      - POOL_FULL
+      - POOL_NEARFULL
+      - POOL_BACKFILLFULL
+      - PG_DEGRADED
+      - OSD_OUT_OF_ORDER_FULL
+      - OSD_NEARFULL
+      - OSD_FULL
diff --git a/qa/suites/fs/full/overrides/+ b/qa/suites/fs/full/overrides/+

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/qa/suites/fs/full/overrides/.qa b/qa/suites/fs/full/overrides/.qa

new file mode 120000 (symlink)

index 0000000..a602a03
--- /dev/null
+++ b/qa/suites/fs/full/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/full/overrides/frag_enable.yaml b/qa/suites/fs/full/overrides/frag_enable.yaml

new file mode 120000 (symlink)

index 0000000..34a39a3
--- /dev/null
+++ b/qa/suites/fs/full/overrides/frag_enable.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/frag_enable.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/full/overrides/no_client_pidfile.yaml b/qa/suites/fs/full/overrides/no_client_pidfile.yaml

new file mode 120000 (symlink)

index 0000000..8888f33
--- /dev/null
+++ b/qa/suites/fs/full/overrides/no_client_pidfile.yaml
@@ -0,0 +1 @@
+.qa/overrides/no_client_pidfile.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/full/overrides/whitelist_health.yaml b/qa/suites/fs/full/overrides/whitelist_health.yaml

new file mode 120000 (symlink)

index 0000000..74f39a4
--- /dev/null
+++ b/qa/suites/fs/full/overrides/whitelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/whitelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/full/overrides/whitelist_wrongly_marked_down.yaml b/qa/suites/fs/full/overrides/whitelist_wrongly_marked_down.yaml

new file mode 120000 (symlink)

index 0000000..b4528c0
--- /dev/null
+++ b/qa/suites/fs/full/overrides/whitelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/whitelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/full/tasks/.qa b/qa/suites/fs/full/tasks/.qa

new file mode 120000 (symlink)

index 0000000..a602a03
--- /dev/null
+++ b/qa/suites/fs/full/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/full/tasks/mgr-osd-full.yaml b/qa/suites/fs/full/tasks/mgr-osd-full.yaml

new file mode 100644 (file)

index 0000000..88d6527
--- /dev/null
+++ b/qa/suites/fs/full/tasks/mgr-osd-full.yaml
@@ -0,0 +1,21 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd_pool_default_size: 1
+        osd_pool_default_min_size: 1
+      client:
+        debug ms: 1
+        debug client: 20
+      mds:
+        debug ms: 1
+        debug mds: 20
+      osd: # force bluestore since it's required for ec overwrites
+        osd objectstore: bluestore
+        bluestore block size: 1073741824
+tasks:
+- workunit:
+    cleanup: false
+    clients:
+      client.0:
+        - fs/full/subvolume_rm.sh
diff --git a/qa/tasks/workunit.py b/qa/tasks/workunit.py

index 491f683082e008724008ad2bbdf080a9887c594f..371d2a2dddb5a19d53b12b6ee73c02ff1f188005 100644 (file)
--- a/qa/tasks/workunit.py
+++ b/qa/tasks/workunit.py
@@ -400,6 +400,7 @@ def _run_tests(ctx, refspec, role, tests, env, basedir,
                      run.Raw('PATH=$PATH:/usr/sbin'),
                      run.Raw('CEPH_BASE={dir}'.format(dir=clonedir)),
                      run.Raw('CEPH_ROOT={dir}'.format(dir=clonedir)),
+                    run.Raw('CEPH_MNT={dir}'.format(dir=mnt)),
                  ]
                  if env is not None:
                      for var, val in env.items():
diff --git a/qa/workunits/fs/full/subvolume_rm.sh b/qa/workunits/fs/full/subvolume_rm.sh

new file mode 100755 (executable)

index 0000000..d0f9e24
--- /dev/null
+++ b/qa/workunits/fs/full/subvolume_rm.sh
@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+set -ex
+
+# This testcase tests the scenario of the 'ceph fs subvolume rm' mgr command
+# when the osd is full. The command used to hang. The osd is of the size 1GB.
+# The subvolume is created and 500MB file is written. The full-ratios are
+# set below 500MB such that the osd is treated as full. Now the subvolume is
+# is removed. This should be successful with the introduction of FULL
+# capabilities which the mgr holds.
+
+set -e
+expect_failure() {
+       if "$@"; then return 1; else return 0; fi
+}
+
+ceph fs subvolume create cephfs sub_0
+subvol_path=$(ceph fs subvolume getpath cephfs sub_0 2>/dev/null)
+
+#For debugging
+echo "Before write"
+df -h
+ceph osd df
+
+sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/500MB_file-1 status=progress bs=1M count=500
+
+ceph osd set-full-ratio 0.2
+ceph osd set-nearfull-ratio 0.16
+ceph osd set-backfillfull-ratio 0.18
+
+timeout=30
+while [ $timeout -gt 0 ]
+do
+  health=$(ceph health detail)
+  [[ $health = *"OSD_FULL"* ]] && echo "OSD is full" && break
+  echo "Wating for osd to be full: $timeout"
+  sleep 1
+  let "timeout-=1"
+done
+
+#For debugging
+echo "After ratio set"
+df -h
+ceph osd df
+
+#Delete subvolume
+ceph fs subvolume rm cephfs sub_0
+
+#Validate subvolume is deleted
+expect_failure ceph fs subvolume info cephfs sub_0
+
+#Wait for subvolume to delete data
+trashdir=$CEPH_MNT/volumes/_deleting
+timeout=30
+while [ $timeout -gt 0 ]
+do
+  [ -z "$(sudo ls -A $trashdir)" ] && echo "Trash directory $trashdir is empty" &&  break
+  echo "Wating for trash dir to be empty: $timeout"
+  sleep 1
+  let "timeout-=1"
+done
+
+echo OK
author	Kotresh HR <khiremat@redhat.com>
	Thu, 11 Mar 2021 16:44:55 +0000 (22:14 +0530)
committer	Kotresh HR <khiremat@redhat.com>
	Sat, 22 May 2021 13:39:07 +0000 (19:09 +0530)
qa/cephfs/clusters/1-node-1-mds-1-osd.yaml	[new file with mode: 0644]	patch \| blob
qa/suites/fs/full/%	[new file with mode: 0644]	patch \| blob
qa/suites/fs/full/.qa	[new symlink]	patch \| blob
qa/suites/fs/full/begin.yaml	[new symlink]	patch \| blob
qa/suites/fs/full/clusters/.qa	[new symlink]	patch \| blob
qa/suites/fs/full/clusters/1-node-1-mds-1-osd.yaml	[new symlink]	patch \| blob
qa/suites/fs/full/conf	[new symlink]	patch \| blob
qa/suites/fs/full/distro	[new symlink]	patch \| blob
qa/suites/fs/full/mount/fuse.yaml	[new file with mode: 0644]	patch \| blob
qa/suites/fs/full/objectstore/.qa	[new symlink]	patch \| blob
qa/suites/fs/full/objectstore/bluestore-bitmap.yaml	[new symlink]	patch \| blob
qa/suites/fs/full/overrides.yaml	[new file with mode: 0644]	patch \| blob
qa/suites/fs/full/overrides/+	[new file with mode: 0644]	patch \| blob
qa/suites/fs/full/overrides/.qa	[new symlink]	patch \| blob
qa/suites/fs/full/overrides/frag_enable.yaml	[new symlink]	patch \| blob
qa/suites/fs/full/overrides/no_client_pidfile.yaml	[new symlink]	patch \| blob
qa/suites/fs/full/overrides/whitelist_health.yaml	[new symlink]	patch \| blob
qa/suites/fs/full/overrides/whitelist_wrongly_marked_down.yaml	[new symlink]	patch \| blob
qa/suites/fs/full/tasks/.qa	[new symlink]	patch \| blob
qa/suites/fs/full/tasks/mgr-osd-full.yaml	[new file with mode: 0644]	patch \| blob
qa/tasks/workunit.py		patch \| blob \| history
qa/workunits/fs/full/subvolume_rm.sh	[new file with mode: 0755]	patch \| blob