From: Kotresh HR Date: Thu, 11 Mar 2021 16:44:55 +0000 (+0530) Subject: test: Add test for mgr hang when osd is full X-Git-Tag: v16.2.5~69^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F41691%2Fhead;p=ceph.git test: Add test for mgr hang when osd is full Add fs suite for tests requiring one node as well. Fixes: https://tracker.ceph.com/issues/50532 Signed-off-by: Patrick Donnelly Signed-off-by: Kotresh HR (cherry picked from commit 2bd6ba8026d9374e990abbb2cddd39a87290f261) --- diff --git a/qa/cephfs/clusters/1-node-1-mds-1-osd.yaml b/qa/cephfs/clusters/1-node-1-mds-1-osd.yaml new file mode 100644 index 00000000000..865b976c699 --- /dev/null +++ b/qa/cephfs/clusters/1-node-1-mds-1-osd.yaml @@ -0,0 +1,8 @@ +roles: +- [mon.a, mgr.x, mds.a, osd.0, client.0] +openstack: +- volumes: # attached to each instance + count: 1 + size: 5 # GB +- machine: + disk: 10 # GB diff --git a/qa/suites/fs/full/% b/qa/suites/fs/full/% new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/fs/full/.qa b/qa/suites/fs/full/.qa new file mode 120000 index 00000000000..a602a0353e7 --- /dev/null +++ b/qa/suites/fs/full/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/qa/suites/fs/full/begin.yaml b/qa/suites/fs/full/begin.yaml new file mode 120000 index 00000000000..311d404f7c2 --- /dev/null +++ b/qa/suites/fs/full/begin.yaml @@ -0,0 +1 @@ +.qa/cephfs/begin.yaml \ No newline at end of file diff --git a/qa/suites/fs/full/clusters/.qa b/qa/suites/fs/full/clusters/.qa new file mode 120000 index 00000000000..a602a0353e7 --- /dev/null +++ b/qa/suites/fs/full/clusters/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/qa/suites/fs/full/clusters/1-node-1-mds-1-osd.yaml b/qa/suites/fs/full/clusters/1-node-1-mds-1-osd.yaml new file mode 120000 index 00000000000..517b76547e9 --- /dev/null +++ b/qa/suites/fs/full/clusters/1-node-1-mds-1-osd.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1-node-1-mds-1-osd.yaml \ No newline at end of file diff --git a/qa/suites/fs/full/conf b/qa/suites/fs/full/conf new file mode 120000 index 00000000000..16e8cc44b7d --- /dev/null +++ b/qa/suites/fs/full/conf @@ -0,0 +1 @@ +.qa/cephfs/conf \ No newline at end of file diff --git a/qa/suites/fs/full/distro b/qa/suites/fs/full/distro new file mode 120000 index 00000000000..0862b4457b3 --- /dev/null +++ b/qa/suites/fs/full/distro @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$ \ No newline at end of file diff --git a/qa/suites/fs/full/mount/fuse.yaml b/qa/suites/fs/full/mount/fuse.yaml new file mode 100644 index 00000000000..8338cc4933e --- /dev/null +++ b/qa/suites/fs/full/mount/fuse.yaml @@ -0,0 +1,2 @@ +tasks: + - ceph-fuse: diff --git a/qa/suites/fs/full/objectstore/.qa b/qa/suites/fs/full/objectstore/.qa new file mode 120000 index 00000000000..a602a0353e7 --- /dev/null +++ b/qa/suites/fs/full/objectstore/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/qa/suites/fs/full/objectstore/bluestore-bitmap.yaml b/qa/suites/fs/full/objectstore/bluestore-bitmap.yaml new file mode 120000 index 00000000000..a59cf517506 --- /dev/null +++ b/qa/suites/fs/full/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml \ No newline at end of file diff --git a/qa/suites/fs/full/overrides.yaml b/qa/suites/fs/full/overrides.yaml new file mode 100644 index 00000000000..921528d66a5 --- /dev/null +++ b/qa/suites/fs/full/overrides.yaml @@ -0,0 +1,19 @@ +overrides: + ceph: + conf: + mgr: + debug client: 20 + log-ignorelist: + - OSD full dropping all updates + - OSD near full + - pausewr flag + - failsafe engaged, dropping updates + - failsafe disengaged, no longer dropping + - is full \(reached quota + - POOL_FULL + - POOL_NEARFULL + - POOL_BACKFILLFULL + - PG_DEGRADED + - OSD_OUT_OF_ORDER_FULL + - OSD_NEARFULL + - OSD_FULL diff --git a/qa/suites/fs/full/overrides/+ b/qa/suites/fs/full/overrides/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/fs/full/overrides/.qa b/qa/suites/fs/full/overrides/.qa new file mode 120000 index 00000000000..a602a0353e7 --- /dev/null +++ b/qa/suites/fs/full/overrides/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/qa/suites/fs/full/overrides/frag_enable.yaml b/qa/suites/fs/full/overrides/frag_enable.yaml new file mode 120000 index 00000000000..34a39a368cf --- /dev/null +++ b/qa/suites/fs/full/overrides/frag_enable.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/frag_enable.yaml \ No newline at end of file diff --git a/qa/suites/fs/full/overrides/no_client_pidfile.yaml b/qa/suites/fs/full/overrides/no_client_pidfile.yaml new file mode 120000 index 00000000000..8888f33274b --- /dev/null +++ b/qa/suites/fs/full/overrides/no_client_pidfile.yaml @@ -0,0 +1 @@ +.qa/overrides/no_client_pidfile.yaml \ No newline at end of file diff --git a/qa/suites/fs/full/overrides/whitelist_health.yaml b/qa/suites/fs/full/overrides/whitelist_health.yaml new file mode 120000 index 00000000000..74f39a49b27 --- /dev/null +++ b/qa/suites/fs/full/overrides/whitelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/whitelist_health.yaml \ No newline at end of file diff --git a/qa/suites/fs/full/overrides/whitelist_wrongly_marked_down.yaml b/qa/suites/fs/full/overrides/whitelist_wrongly_marked_down.yaml new file mode 120000 index 00000000000..b4528c0f8c0 --- /dev/null +++ b/qa/suites/fs/full/overrides/whitelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/whitelist_wrongly_marked_down.yaml \ No newline at end of file diff --git a/qa/suites/fs/full/tasks/.qa b/qa/suites/fs/full/tasks/.qa new file mode 120000 index 00000000000..a602a0353e7 --- /dev/null +++ b/qa/suites/fs/full/tasks/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/qa/suites/fs/full/tasks/mgr-osd-full.yaml b/qa/suites/fs/full/tasks/mgr-osd-full.yaml new file mode 100644 index 00000000000..88d6527bf82 --- /dev/null +++ b/qa/suites/fs/full/tasks/mgr-osd-full.yaml @@ -0,0 +1,21 @@ +overrides: + ceph: + conf: + global: + osd_pool_default_size: 1 + osd_pool_default_min_size: 1 + client: + debug ms: 1 + debug client: 20 + mds: + debug ms: 1 + debug mds: 20 + osd: # force bluestore since it's required for ec overwrites + osd objectstore: bluestore + bluestore block size: 1073741824 +tasks: +- workunit: + cleanup: false + clients: + client.0: + - fs/full/subvolume_rm.sh diff --git a/qa/tasks/workunit.py b/qa/tasks/workunit.py index 491f683082e..371d2a2dddb 100644 --- a/qa/tasks/workunit.py +++ b/qa/tasks/workunit.py @@ -400,6 +400,7 @@ def _run_tests(ctx, refspec, role, tests, env, basedir, run.Raw('PATH=$PATH:/usr/sbin'), run.Raw('CEPH_BASE={dir}'.format(dir=clonedir)), run.Raw('CEPH_ROOT={dir}'.format(dir=clonedir)), + run.Raw('CEPH_MNT={dir}'.format(dir=mnt)), ] if env is not None: for var, val in env.items(): diff --git a/qa/workunits/fs/full/subvolume_rm.sh b/qa/workunits/fs/full/subvolume_rm.sh new file mode 100755 index 00000000000..d0f9e240347 --- /dev/null +++ b/qa/workunits/fs/full/subvolume_rm.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +set -ex + +# This testcase tests the scenario of the 'ceph fs subvolume rm' mgr command +# when the osd is full. The command used to hang. The osd is of the size 1GB. +# The subvolume is created and 500MB file is written. The full-ratios are +# set below 500MB such that the osd is treated as full. Now the subvolume is +# is removed. This should be successful with the introduction of FULL +# capabilities which the mgr holds. + +set -e +expect_failure() { + if "$@"; then return 1; else return 0; fi +} + +ceph fs subvolume create cephfs sub_0 +subvol_path=$(ceph fs subvolume getpath cephfs sub_0 2>/dev/null) + +#For debugging +echo "Before write" +df -h +ceph osd df + +sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/500MB_file-1 status=progress bs=1M count=500 + +ceph osd set-full-ratio 0.2 +ceph osd set-nearfull-ratio 0.16 +ceph osd set-backfillfull-ratio 0.18 + +timeout=30 +while [ $timeout -gt 0 ] +do + health=$(ceph health detail) + [[ $health = *"OSD_FULL"* ]] && echo "OSD is full" && break + echo "Wating for osd to be full: $timeout" + sleep 1 + let "timeout-=1" +done + +#For debugging +echo "After ratio set" +df -h +ceph osd df + +#Delete subvolume +ceph fs subvolume rm cephfs sub_0 + +#Validate subvolume is deleted +expect_failure ceph fs subvolume info cephfs sub_0 + +#Wait for subvolume to delete data +trashdir=$CEPH_MNT/volumes/_deleting +timeout=30 +while [ $timeout -gt 0 ] +do + [ -z "$(sudo ls -A $trashdir)" ] && echo "Trash directory $trashdir is empty" && break + echo "Wating for trash dir to be empty: $timeout" + sleep 1 + let "timeout-=1" +done + +echo OK