From: Sage Weil Date: Mon, 10 Feb 2020 16:05:29 +0000 (-0600) Subject: cephadm: do not use special unit, naming for crash agent X-Git-Tag: v15.1.1~467^2~1 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=724199dd873e2a02f67ea2e191093d8080c686a7;p=ceph-ci.git cephadm: do not use special unit, naming for crash agent Use the normal unit file, with crash.$hostname as the name. This makes the crash agent behave like all of the other services instead of being special. The exception is that the crash agent is still implicitly deployed alongside other containers. Signed-off-by: Sage Weil --- diff --git a/src/cephadm/cephadm b/src/cephadm/cephadm index e9bdff7e587..c3399a57533 100755 --- a/src/cephadm/cephadm +++ b/src/cephadm/cephadm @@ -877,7 +877,7 @@ def get_daemon_args(fsid, daemon_type, daemon_id): # type: (str, str, Union[int, str]) -> List[str] r = list() # type: List[str] - if daemon_type in Ceph.daemons: + if daemon_type in Ceph.daemons and daemon_type != 'crash': r += [ '--setuser', 'ceph', '--setgroup', 'ceph', @@ -1056,12 +1056,13 @@ def get_container_mounts(fsid, daemon_type, daemon_id, cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id) else: cdata_dir = '/var/lib/ceph/%s/ceph-%s' % (daemon_type, daemon_id) - mounts[data_dir] = cdata_dir + ':z' + if daemon_type != 'crash': + mounts[data_dir] = cdata_dir + ':z' if not no_config: mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z' - if daemon_type == 'rbd-mirror': - # rbd-mirror does not search for its keyring in a data directory - mounts[data_dir + '/keyring'] = '/etc/ceph/ceph.client.rbd-mirror.%s.keyring' % daemon_id + if daemon_type == 'rbd-mirror' or daemon_type == 'crash': + # these do not search for their keyrings in a data directory + mounts[data_dir + '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type, daemon_id) if daemon_type in ['mon', 'osd']: mounts['/dev'] = '/dev' # FIXME: narrow this down? @@ -1099,6 +1100,9 @@ def get_container(fsid, daemon_type, daemon_id, privileged=False, elif daemon_type == 'rbd-mirror': entrypoint = '/usr/bin/rbd-mirror' name = 'client.rbd-mirror.%s' % daemon_id + elif daemon_type == 'crash': + entrypoint = '/usr/bin/ceph-crash' + name = 'client.crash.%s' % daemon_id elif daemon_type in ['mon', 'mgr', 'mds', 'osd']: entrypoint = '/usr/bin/ceph-' + daemon_type name = '%s.%s' % (daemon_type, daemon_id) @@ -1106,10 +1110,13 @@ def get_container(fsid, daemon_type, daemon_id, privileged=False, entrypoint = '' name = '' - ceph_args = ['-n', name, '-f'] - if daemon_type in Monitoring.components: ceph_args = [] + elif daemon_type == 'crash': + ceph_args = ['-n', name] + else: + ceph_args = ['-n', name, '-f'] + return CephContainer( image=args.image, @@ -1380,59 +1387,9 @@ def deploy_crash(fsid, uid, gid, config, keyring): # type: (str, int, int, str, str) -> None crash_dir = os.path.join(args.data_dir, fsid, 'crash') makedirs(crash_dir, uid, gid, DATA_DIR_MODE) - - with open(os.path.join(crash_dir, 'keyring'), 'w') as f: - os.fchmod(f.fileno(), 0o600) - os.fchown(f.fileno(), uid, gid) - f.write(keyring) - with open(os.path.join(crash_dir, 'config'), 'w') as f: - os.fchmod(f.fileno(), 0o600) - os.fchown(f.fileno(), uid, gid) - f.write(config) - - # ceph-crash unit - mounts = { - crash_dir: '/var/lib/ceph/crash:z', - os.path.join(crash_dir, 'config'): '/etc/ceph/ceph.conf:z', - os.path.join(crash_dir, 'keyring'): '/etc/ceph/ceph.keyring:z', - } - c = CephContainer( - image=args.image, - entrypoint='/usr/bin/ceph-crash', - args=['-n', 'client.crash.%s' % get_hostname()], - volume_mounts=mounts, - cname='ceph-%s-crash' % (fsid), - ) - unit_name = 'ceph-%s-crash.service' % fsid - with open(os.path.join(args.unit_dir, unit_name + '.new'), 'w') as f: - f.write("""# generated by cephadm -[Unit] -Description=Ceph crash collector for {fsid} -PartOf=ceph-{fsid}.target -Before=ceph-{fsid}.target - -[Service] -Type=simple -ExecStartPre=-{container_path} rm ceph-{fsid}-crash -ExecStart={cmd} -ExecStop=-{container_path} stop ceph-{fsid}-crash -KillMode=none -Restart=always -RestartSec=10 -StartLimitInterval=10min -StartLimitBurst=10 - -[Install] -WantedBy=ceph-{fsid}.target -""".format( - container_path=container_path, - fsid=fsid, - cmd=' '.join(c.run_cmd())) - ) - os.rename(os.path.join(args.unit_dir, unit_name + '.new'), - os.path.join(args.unit_dir, unit_name)) - subprocess.check_output(['systemctl', 'enable', unit_name]) - subprocess.check_output(['systemctl', 'start', unit_name]) + c = get_container(args.fsid, 'crash', get_hostname()) + deploy_daemon(args.fsid, 'crash', get_hostname(), c, uid, gid, + config, keyring) def get_unit_file(fsid, uid, gid): # type: (str, int, int) -> str @@ -2240,10 +2197,7 @@ def list_daemons(detail=True, legacy_dir=None): elif is_fsid(i): fsid = i for j in os.listdir(os.path.join(data_dir, i)): - if j == 'crash': - name = 'crash' - unit_name = 'ceph-%s-crash.service' % fsid - elif '.' in j: + if '.' in j: name = j (daemon_type, daemon_id) = j.split('.', 1) unit_name = get_unit_name(fsid, @@ -2483,8 +2437,7 @@ def command_rm_cluster(): verbose_on_failure=False) # cluster units - for unit_name in ['ceph-%s.target' % args.fsid, - 'ceph-%s-crash.service' % args.fsid]: + for unit_name in ['ceph-%s.target' % args.fsid]: call(['systemctl', 'stop', unit_name], verbose_on_failure=False) call(['systemctl', 'reset-failed', unit_name], @@ -2500,8 +2453,6 @@ def command_rm_cluster(): # rm units call_throws(['rm', '-f', args.unit_dir + '/ceph-%s@.service' % args.fsid]) - call_throws(['rm', '-f', args.unit_dir + - '/ceph-%s-crash.service' % args.fsid]) call_throws(['rm', '-f', args.unit_dir + '/ceph-%s.target' % args.fsid]) call_throws(['rm', '-rf',