From 1a3cbacdb242b37bd8b532b990408cfd3321d67f Mon Sep 17 00:00:00 2001
From: Kotresh HR <khiremat@redhat.com>
Date: Thu, 24 Jul 2025 17:31:12 +0000
Subject: [PATCH] qa: Add test for subvolume_ls on osd full

Fixes: https://tracker.ceph.com/issues/72260
Signed-off-by: Kotresh HR <khiremat@redhat.com>
(cherry picked from commit 8547e57ebc4022ca6750149f49b68599a8af712e)
---
 ...mds-1-osd.yaml => 1-node-4-mds-1-osd.yaml} |   2 +-
 .../fs/full/clusters/1-node-1-mds-1-osd.yaml  |   1 -
 .../fs/full/clusters/1-node-4-mds-1-osd.yaml  |   1 +
 qa/suites/fs/full/overrides.yaml              |   1 +
 qa/suites/fs/full/tasks/mgr-osd-full.yaml     |   5 +
 qa/workunits/fs/full/subvolume_ls.sh          | 119 ++++++++++++++++++
 6 files changed, 127 insertions(+), 2 deletions(-)
 rename qa/cephfs/clusters/{1-node-1-mds-1-osd.yaml => 1-node-4-mds-1-osd.yaml} (65%)
 delete mode 120000 qa/suites/fs/full/clusters/1-node-1-mds-1-osd.yaml
 create mode 120000 qa/suites/fs/full/clusters/1-node-4-mds-1-osd.yaml
 create mode 100755 qa/workunits/fs/full/subvolume_ls.sh

diff --git a/qa/cephfs/clusters/1-node-1-mds-1-osd.yaml b/qa/cephfs/clusters/1-node-4-mds-1-osd.yaml
similarity index 65%
rename from qa/cephfs/clusters/1-node-1-mds-1-osd.yaml
rename to qa/cephfs/clusters/1-node-4-mds-1-osd.yaml
index 865b976c699..dc3e10681e7 100644
--- a/qa/cephfs/clusters/1-node-1-mds-1-osd.yaml
+++ b/qa/cephfs/clusters/1-node-4-mds-1-osd.yaml
@@ -1,5 +1,5 @@
 roles:
-- [mon.a, mgr.x, mds.a, osd.0, client.0]
+- [mon.a, mgr.x, mds.a, mds.b, mds.c, mds.d, osd.0, client.0]
 openstack:
 - volumes: # attached to each instance
     count: 1
diff --git a/qa/suites/fs/full/clusters/1-node-1-mds-1-osd.yaml b/qa/suites/fs/full/clusters/1-node-1-mds-1-osd.yaml
deleted file mode 120000
index 517b76547e9..00000000000
--- a/qa/suites/fs/full/clusters/1-node-1-mds-1-osd.yaml
+++ /dev/null
@@ -1 +0,0 @@
-.qa/cephfs/clusters/1-node-1-mds-1-osd.yaml
\ No newline at end of file
diff --git a/qa/suites/fs/full/clusters/1-node-4-mds-1-osd.yaml b/qa/suites/fs/full/clusters/1-node-4-mds-1-osd.yaml
new file mode 120000
index 00000000000..95633a09f9d
--- /dev/null
+++ b/qa/suites/fs/full/clusters/1-node-4-mds-1-osd.yaml
@@ -0,0 +1 @@
+.qa/cephfs/clusters/1-node-4-mds-1-osd.yaml
\ No newline at end of file
diff --git a/qa/suites/fs/full/overrides.yaml b/qa/suites/fs/full/overrides.yaml
index 921528d66a5..afaab9b5a8d 100644
--- a/qa/suites/fs/full/overrides.yaml
+++ b/qa/suites/fs/full/overrides.yaml
@@ -17,3 +17,4 @@ overrides:
       - OSD_OUT_OF_ORDER_FULL
       - OSD_NEARFULL
       - OSD_FULL
+      - MGR_DOWN
diff --git a/qa/suites/fs/full/tasks/mgr-osd-full.yaml b/qa/suites/fs/full/tasks/mgr-osd-full.yaml
index a005f52037c..ff5e1adc06d 100644
--- a/qa/suites/fs/full/tasks/mgr-osd-full.yaml
+++ b/qa/suites/fs/full/tasks/mgr-osd-full.yaml
@@ -29,3 +29,8 @@ tasks:
     clients:
       client.0:
         - fs/full/subvolume_snapshot_rm.sh
+- workunit:
+    cleanup: true
+    clients:
+      client.0:
+        - fs/full/subvolume_ls.sh
diff --git a/qa/workunits/fs/full/subvolume_ls.sh b/qa/workunits/fs/full/subvolume_ls.sh
new file mode 100755
index 00000000000..7975c4061e2
--- /dev/null
+++ b/qa/workunits/fs/full/subvolume_ls.sh
@@ -0,0 +1,119 @@
+#!/usr/bin/env bash
+set -ex
+
+# This testcase tests the scenario of the 'ceph fs subvolume ls' mgr command
+# when the osd is full. The command used to miss out few subvolumes in the list.
+# The issue happens in the multi-mds active setup. Please see the tracker
+# https://tracker.ceph.com/issues/72260
+
+# The suite sets the 'bluestore block size' to 2GiB. So, the osd is of the
+# size 2GiB. The 25 subvolumes are created and a 1GB file is written on the
+# root. The full-ratios are set such that, the data less than 500MB is
+# treated as osd full. Now, subvolumes are listed 20 times with mgr failover
+# (to invalidate readdir cache) and validated each time.
+
+SUBVOL_CNT=25
+
+expect_failure() {
+  if "$@"; then return 1; else return 0; fi
+}
+validate_subvol_cnt() {
+  if [ $1 -eq $SUBVOL_CNT ]; then return 0; else return 1; fi
+}
+restart_mgr() {
+  ceph mgr fail x
+  timeout=30
+  while [ $timeout -gt 0 ]
+  do
+    active_mgr_cnt=$(ceph status | grep mgr | grep active | grep -v no | wc -l)
+    if [ $active_mgr_cnt -eq 1 ]; then break; fi
+    echo "Waiting for mgr to be active after failover: $timeout"
+    sleep 1
+    let "timeout-=1"
+  done
+}
+
+#Set client_use_random_mds
+ceph config set client client_use_random_mds true
+
+#Set max_mds to 3
+ceph fs set cephfs max_mds 3
+timeout=30
+while [ $timeout -gt 0 ]
+do
+  active_cnt=$(ceph fs status | grep active | wc -l)
+  if [ $active_cnt -eq 2 ]; then break; fi
+  echo "Wating for max_mds to be 2: $timeout"
+  sleep 1
+  let "timeout-=1"
+done
+
+#Create subvolumes
+for i in $(seq 1 $SUBVOL_CNT); do ceph fs subvolume create cephfs sub_$i; done
+
+#For debugging
+echo "Before write"
+df -h
+ceph osd df
+
+sudo dd if=/dev/urandom of=$CEPH_MNT/1GB_file-1 status=progress bs=1M count=1000
+
+# The suite (qa/suites/fs/full/tasks/mgr-osd-full.yaml) sets the 'bluestore block size'
+# to 2GiB. So, the osd is of the size 2GiB. The full-ratios are set below makes sure
+# that the data less than 500MB is treated as osd full.
+ceph osd set-full-ratio 0.2
+ceph osd set-nearfull-ratio 0.16
+ceph osd set-backfillfull-ratio 0.18
+
+timeout=30
+while [ $timeout -gt 0 ]
+do
+  health=$(ceph health detail)
+  [[ $health = *"OSD_FULL"* ]] && echo "OSD is full" && break
+  echo "Waiting for osd to be full: $timeout"
+  sleep 1
+  let "timeout-=1"
+done
+
+#For debugging
+echo "After ratio set"
+df -h
+ceph osd df
+
+#Clear readdir cache by failing over mgr which forces to use new libcephfs connection
+#Validate subvolume ls  20 times
+for i in {1..20};
+do
+  restart_mgr
+  #List and validate subvolumes count
+  subvol_cnt=$(ceph fs subvolume ls cephfs --format=json-pretty | grep sub_ | wc -l)
+  validate_subvol_cnt $subvol_cnt
+done
+
+#Delete all subvolumes
+for i in $(seq 1 $SUBVOL_CNT); do ceph fs subvolume rm cephfs sub_$i; done
+
+#Wait for subvolume to delete data
+trashdir=$CEPH_MNT/volumes/_deleting
+timeout=30
+while [ $timeout -gt 0 ]
+do
+  [ -z "$(sudo ls -A $trashdir)" ] && echo "Trash directory $trashdir is empty" &&  break
+  echo "Waiting for trash dir to be empty: $timeout"
+  sleep 1
+  let "timeout-=1"
+done
+
+sudo rm -f $CEPH_MNT/1GB_file-1
+
+#Set the ratios back for other full tests to run
+ceph osd set-full-ratio 0.95
+ceph osd set-nearfull-ratio 0.95
+ceph osd set-backfillfull-ratio 0.95
+
+#After test
+echo "After test"
+df -h
+ceph osd df
+
+echo OK
-- 
2.39.5