]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
ceph-daemon: make rm-cluster handle failed unit cleanup
authorSage Weil <sage@redhat.com>
Mon, 4 Nov 2019 15:34:28 +0000 (09:34 -0600)
committerSage Weil <sage@redhat.com>
Mon, 4 Nov 2019 15:36:02 +0000 (09:36 -0600)
Signed-off-by: Sage Weil <sage@redhat.com>
src/ceph-daemon

index f2d4b4170135a9c86a9adcdc980606b65efee263..62f678e3ca3fa3acc2eb2339428bb6492186627d 100755 (executable)
@@ -228,8 +228,12 @@ def find_program(filename):
         raise ValueError('%s not found' % filename)
     return name
 
-def get_unit_name(fsid, daemon_type, daemon_id):
-    return 'ceph-%s@%s.%s' % (fsid, daemon_type, daemon_id)
+def get_unit_name(fsid, daemon_type, daemon_id=None):
+    # accept either name or type + id
+    if daemon_id is not None:
+        return 'ceph-%s@%s.%s' % (fsid, daemon_type, daemon_id)
+    else:
+        return 'ceph-%s@%s' % (fsid, daemon_type)
 
 def check_unit(unit_name):
     # NOTE: we ignore the exit code here because systemctl outputs
@@ -1261,8 +1265,12 @@ def command_unit():
 ##################################
 
 def command_ls():
-    ls = []
     host_version = None
+    ls = list_daemons()
+    print(json.dumps(ls, indent=4))
+
+def list_daemons():
+    ls = []
 
     # /var/lib/ceph
     if os.path.exists(args.data_dir):
@@ -1334,8 +1342,8 @@ def command_ls():
 
     # /var/lib/rook
     # WRITE ME
+    return ls
 
-    print(json.dumps(ls, indent=4))
 
 ##################################
 
@@ -1415,7 +1423,21 @@ def command_rm_cluster():
         raise RuntimeError('must pass --force to proceed: '
                            'this command may destroy precious data!')
 
-    # ignore errors here
+    # stop + disable individual daemon units
+    for d in list_daemons():
+        if d['fsid'] != args.fsid:
+            continue
+        if d['style'] != 'ceph-daemon:v1':
+            continue
+        unit_name = get_unit_name(args.fsid, d['name'])
+        call(['systemctl', 'stop', unit_name],
+             verbose_on_failure=False)
+        call(['systemctl', 'reset-failed', unit_name],
+             verbose_on_failure=False)
+        call(['systemctl', 'disable', unit_name],
+             verbose_on_failure=False)
+
+    # cluster units
     for unit_name in ['ceph-%s.target' % args.fsid,
                       'ceph-%s-crash.service' % args.fsid]:
         call(['systemctl', 'stop', unit_name],
@@ -1430,8 +1452,6 @@ def command_rm_cluster():
     call(['systemctl', 'stop', slice_name],
          verbose_on_failure=False)
 
-    # FIXME: stop + disable individual daemon units, too?
-
     # rm units
     call_throws(['rm', '-f', args.unit_dir +
                              '/ceph-%s@.service' % args.fsid])