From: Sage Weil Date: Mon, 26 Apr 2021 18:48:03 +0000 (-0400) Subject: mgr/cephadm: nfs: add rank to grace file from mgr module X-Git-Tag: v16.2.5~87^2~59 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9cb23c75b0da53b1ccc1e476d69b46f8aa9c94f7;p=ceph.git mgr/cephadm: nfs: add rank to grace file from mgr module Do the grace file manipulation from the mgr module. For add, this isn't especially important, but for remove it is very important. Clean out old ranks from the grace table before we record that the rank has been purged from the rank_map. Signed-off-by: Sage Weil (cherry picked from commit 74169045a9b212d4b549159a8f7870ed6e9119ea) --- diff --git a/src/cephadm/cephadm b/src/cephadm/cephadm index b218e6d29a2e..aac7331515b0 100755 --- a/src/cephadm/cephadm +++ b/src/cephadm/cephadm @@ -505,43 +505,6 @@ class NFSGanesha(object): os.fchown(f.fileno(), uid, gid) f.write(self.rgw.get('keyring', '')) - def get_rados_grace_container(self, action): - # type: (str) -> CephContainer - """Container for a ganesha action on the grace db""" - entrypoint = '/usr/bin/ganesha-rados-grace' - - assert self.pool - args = ['--pool', self.pool] - if self.namespace: - args += ['--ns', self.namespace] - if self.userid: - args += ['--userid', self.userid] - - meta = json.loads(self.ctx.meta_json) - if 'service_name' in meta and 'rank' in meta: - nodeid = f"{meta['service_name']}.{meta['rank']}" - else: - nodeid = self.daemon_id - - args += [action, nodeid] - - data_dir = get_data_dir(self.fsid, self.ctx.data_dir, - self.daemon_type, self.daemon_id) - volume_mounts = self.get_container_mounts(data_dir) - envs = self.get_container_envs() - - logger.info('Creating RADOS grace for action: %s' % action) - c = CephContainer( - self.ctx, - image=self.image, - entrypoint=entrypoint, - args=args, - volume_mounts=volume_mounts, - cname=self.get_container_name(desc='grace-%s' % action), - envs=envs - ) - return c - ################################## @@ -2749,11 +2712,6 @@ def deploy_daemon_units( memory_limit=ctx.memory_limit, ) _write_container_cmd_to_bash(ctx, f, prestart, 'LVM OSDs use ceph-volume lvm activate') - elif daemon_type == NFSGanesha.daemon_type: - # add nfs to the rados grace db - nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id) - prestart = nfs_ganesha.get_rados_grace_container('add') - _write_container_cmd_to_bash(ctx, f, prestart, 'add daemon to rados grace') elif daemon_type == CephIscsi.daemon_type: f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n') ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id) @@ -2800,11 +2758,6 @@ def deploy_daemon_units( daemon_id), ) _write_container_cmd_to_bash(ctx, f, poststop, 'deactivate osd') - elif daemon_type == NFSGanesha.daemon_type: - # remove nfs from the rados grace db - nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id) - poststop = nfs_ganesha.get_rados_grace_container('remove') - _write_container_cmd_to_bash(ctx, f, poststop, 'remove daemon from rados grace') elif daemon_type == CephIscsi.daemon_type: # make sure we also stop the tcmu container ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id) diff --git a/src/pybind/mgr/cephadm/services/nfs.py b/src/pybind/mgr/cephadm/services/nfs.py index 4c0fcd9dfa4e..91fa3d5e5e9b 100644 --- a/src/pybind/mgr/cephadm/services/nfs.py +++ b/src/pybind/mgr/cephadm/services/nfs.py @@ -1,5 +1,8 @@ import errno import logging +import os +import subprocess +import tempfile from typing import Dict, Tuple, Any, List, cast, Optional from mgr_module import HandleCommandResult @@ -39,6 +42,9 @@ class NFSService(CephService): if daemon_id is not None: self.fence(daemon_id) del rank_map[rank] + nodeid = f'{spec.service_name()}.{rank}' + self.mgr.log.info(f'Removing {nodeid} from the ganesha grace table') + self.run_grace_tool(cast(NFSServiceSpec, spec), 'remove', nodeid) self.mgr.spec_store.save_rank_map(spec.service_name(), rank_map) else: max_gen = max(m.keys()) @@ -69,10 +75,16 @@ class NFSService(CephService): deps: List[str] = [] + nodeid = f'{daemon_spec.service_name}.{daemon_spec.rank}' + # create the RADOS recovery pool keyring rados_user = f'{daemon_type}.{daemon_id}' rados_keyring = self.create_keyring(daemon_spec) + # ensure rank is known to ganesha + self.mgr.log.info(f'Ensuring {nodeid} is in the ganesha grace table') + self.run_grace_tool(spec, 'add', nodeid) + # create the rados config object self.create_rados_config_obj(spec) @@ -84,7 +96,7 @@ class NFSService(CephService): def get_ganesha_conf() -> str: context = { "user": rados_user, - "nodeid": f'{daemon_spec.service_name}.{daemon_spec.rank}', + "nodeid": nodeid, "pool": spec.pool, "namespace": spec.namespace if spec.namespace else '', "rgw_user": rgw_user, @@ -172,6 +184,53 @@ class NFSService(CephService): return keyring + def run_grace_tool(self, + spec: NFSServiceSpec, + action: str, + nodeid: str) -> None: + # write a temp keyring and referencing config file. this is a kludge + # because the ganesha-grace-tool can only authenticate as a client (and + # not a mgr). Also, it doesn't allow you to pass a keyring location via + # the command line, nor does it parse the CEPH_ARGS env var. + tmp_id = f'mgr.nfs.grace.{spec.service_name()}' + entity = AuthEntity(f'client.{tmp_id}') + keyring = self.get_keyring_with_caps( + entity, + ['mon', 'allow r', 'osd', f'allow rwx pool {spec.pool}'] + ) + tmp_keyring = tempfile.NamedTemporaryFile(mode='w', prefix='mgr-grace-keyring') + os.fchmod(tmp_keyring.fileno(), 0o600) + tmp_keyring.write(keyring) + tmp_keyring.flush() + tmp_conf = tempfile.NamedTemporaryFile(mode='w', prefix='mgr-grace-conf') + tmp_conf.write(self.mgr.get_minimal_ceph_conf()) + tmp_conf.write(f'\tkeyring = {tmp_keyring.name}\n') + tmp_conf.flush() + try: + cmd: List[str] = [ + 'ganesha-rados-grace', + '--cephconf', tmp_conf.name, + '--userid', tmp_id, + '--pool', cast(str, spec.pool), + ] + if spec.namespace: + cmd += ['--ns', spec.namespace] + cmd += [action, nodeid] + self.mgr.log.debug(cmd) + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + timeout=10) + if result.returncode: + self.mgr.log.warning( + f'ganesha-rados-grace tool failed: {result.stderr.decode("utf-8")}' + ) + raise RuntimeError(f'grace tool failed: {result.stderr.decode("utf-8")}') + + finally: + self.mgr.check_mon_command({ + 'prefix': 'auth rm', + 'entity': entity, + }) + def remove_rgw_keyring(self, daemon: DaemonDescription) -> None: assert daemon.daemon_id is not None daemon_id: str = daemon.daemon_id diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index ba6c0addfe13..167fcc664b94 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -754,6 +754,7 @@ class TestCephadm(object): }) @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock()) @mock.patch("cephadm.module.CephadmOrchestrator.rados", mock.MagicMock()) def test_nfs(self, cephadm_module): with with_host(cephadm_module, 'test'): @@ -915,6 +916,7 @@ class TestCephadm(object): @mock.patch("cephadm.serve.CephadmServe._deploy_cephadm_binary", _deploy_cephadm_binary('test')) @mock.patch("subprocess.run", None) @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock()) def test_apply_save(self, spec: ServiceSpec, meth, cephadm_module: CephadmOrchestrator): with with_host(cephadm_module, 'test'): with with_service(cephadm_module, spec, meth, 'test'):