From 3a1ac3ff6ac2ef6e3e8c361c0c55562b732b9aac Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 4 Nov 2019 09:34:28 -0600 Subject: [PATCH] ceph-daemon: make rm-cluster handle failed unit cleanup Signed-off-by: Sage Weil --- src/ceph-daemon | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/src/ceph-daemon b/src/ceph-daemon index f2d4b417013..62f678e3ca3 100755 --- a/src/ceph-daemon +++ b/src/ceph-daemon @@ -228,8 +228,12 @@ def find_program(filename): raise ValueError('%s not found' % filename) return name -def get_unit_name(fsid, daemon_type, daemon_id): - return 'ceph-%s@%s.%s' % (fsid, daemon_type, daemon_id) +def get_unit_name(fsid, daemon_type, daemon_id=None): + # accept either name or type + id + if daemon_id is not None: + return 'ceph-%s@%s.%s' % (fsid, daemon_type, daemon_id) + else: + return 'ceph-%s@%s' % (fsid, daemon_type) def check_unit(unit_name): # NOTE: we ignore the exit code here because systemctl outputs @@ -1261,8 +1265,12 @@ def command_unit(): ################################## def command_ls(): - ls = [] host_version = None + ls = list_daemons() + print(json.dumps(ls, indent=4)) + +def list_daemons(): + ls = [] # /var/lib/ceph if os.path.exists(args.data_dir): @@ -1334,8 +1342,8 @@ def command_ls(): # /var/lib/rook # WRITE ME + return ls - print(json.dumps(ls, indent=4)) ################################## @@ -1415,7 +1423,21 @@ def command_rm_cluster(): raise RuntimeError('must pass --force to proceed: ' 'this command may destroy precious data!') - # ignore errors here + # stop + disable individual daemon units + for d in list_daemons(): + if d['fsid'] != args.fsid: + continue + if d['style'] != 'ceph-daemon:v1': + continue + unit_name = get_unit_name(args.fsid, d['name']) + call(['systemctl', 'stop', unit_name], + verbose_on_failure=False) + call(['systemctl', 'reset-failed', unit_name], + verbose_on_failure=False) + call(['systemctl', 'disable', unit_name], + verbose_on_failure=False) + + # cluster units for unit_name in ['ceph-%s.target' % args.fsid, 'ceph-%s-crash.service' % args.fsid]: call(['systemctl', 'stop', unit_name], @@ -1430,8 +1452,6 @@ def command_rm_cluster(): call(['systemctl', 'stop', slice_name], verbose_on_failure=False) - # FIXME: stop + disable individual daemon units, too? - # rm units call_throws(['rm', '-f', args.unit_dir + '/ceph-%s@.service' % args.fsid]) -- 2.39.5