From: Redouane Kachach Date: Mon, 23 Mar 2026 11:49:20 +0000 (+0100) Subject: qa/cephadm: zap raw devices before OSD deployment X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f3c938f5032885434dd2ab77f9b2e6bc05917f3c;p=ceph.git qa/cephadm: zap raw devices before OSD deployment ceph_osds() assumes all teuthology scratch devices are clean and directly usable by: > ceph orch apply osd --all-available-devices --method raw However, in practice some devices may retain stale BlueStore metadata (or other data) from previous runs. cephadm correctly skips such devices, leading to fewer OSDs than expected and causing the test to timeout waiting for an exact OSD count. This change adds a pre-deployment cleanup step for `raw-osds` that: - zaps any existing BlueStore metadata (`ceph-bluestore-tool zap-device`) - removes filesystem signatures (`wipefs --all`) - clears initial disk data (`dd`) This ensures all scratch devices are truly available for OSD deployment and prevents mismatches between expected and actual OSD counts. Fixes: https://tracker.ceph.com/issues/75218 Signed-off-by: Redouane Kachach --- diff --git a/qa/tasks/cephadm.py b/qa/tasks/cephadm.py index d4e1b78ab455..2ce26389a22a 100644 --- a/qa/tasks/cephadm.py +++ b/qa/tasks/cephadm.py @@ -1139,6 +1139,23 @@ def ceph_osds(ctx, config): cur += 1 if cur == 0: + if raw: + for remote, devs in devs_by_remote.items(): + for dev in devs: + log.info(f'Zapping device {dev} on {remote.shortname} before raw OSD deployment') + remote.run( + args=[ + 'sudo', 'ceph-bluestore-tool', 'zap-device', + '--dev', dev, + '--yes-i-really-really-mean-it', + ], + check_status=False, + ) + remote.run(args=['sudo', 'wipefs', '--all', dev], check_status=False) + remote.run( + args=['sudo', 'dd', 'if=/dev/zero', f'of={dev}', 'bs=1M', 'count=10', 'conv=fsync'], + check_status=False, + ) osd_cmd = ['ceph', 'orch', 'apply', 'osd', '--all-available-devices'] if raw: osd_cmd.extend(['--method', 'raw'])