]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: nfs: add rank to grace file from mgr module
authorSage Weil <sage@newdream.net>
Mon, 26 Apr 2021 18:48:03 +0000 (14:48 -0400)
committerSage Weil <sage@newdream.net>
Thu, 3 Jun 2021 12:37:20 +0000 (07:37 -0500)
Do the grace file manipulation from the mgr module.  For add, this isn't
especially important, but for remove it is very important.  Clean out
old ranks from the grace table before we record that the rank has been
purged from the rank_map.

Signed-off-by: Sage Weil <sage@newdream.net>
(cherry picked from commit 74169045a9b212d4b549159a8f7870ed6e9119ea)

src/cephadm/cephadm
src/pybind/mgr/cephadm/services/nfs.py
src/pybind/mgr/cephadm/tests/test_cephadm.py

index b218e6d29a2e0832c9f401b16dc2202236545aca..aac7331515b069f81804c517bdd38787f309b65f 100755 (executable)
@@ -505,43 +505,6 @@ class NFSGanesha(object):
                 os.fchown(f.fileno(), uid, gid)
                 f.write(self.rgw.get('keyring', ''))
 
-    def get_rados_grace_container(self, action):
-        # type: (str) -> CephContainer
-        """Container for a ganesha action on the grace db"""
-        entrypoint = '/usr/bin/ganesha-rados-grace'
-
-        assert self.pool
-        args = ['--pool', self.pool]
-        if self.namespace:
-            args += ['--ns', self.namespace]
-        if self.userid:
-            args += ['--userid', self.userid]
-
-        meta = json.loads(self.ctx.meta_json)
-        if 'service_name' in meta and 'rank' in meta:
-            nodeid = f"{meta['service_name']}.{meta['rank']}"
-        else:
-            nodeid = self.daemon_id
-
-        args += [action, nodeid]
-
-        data_dir = get_data_dir(self.fsid, self.ctx.data_dir,
-                                self.daemon_type, self.daemon_id)
-        volume_mounts = self.get_container_mounts(data_dir)
-        envs = self.get_container_envs()
-
-        logger.info('Creating RADOS grace for action: %s' % action)
-        c = CephContainer(
-            self.ctx,
-            image=self.image,
-            entrypoint=entrypoint,
-            args=args,
-            volume_mounts=volume_mounts,
-            cname=self.get_container_name(desc='grace-%s' % action),
-            envs=envs
-        )
-        return c
-
 ##################################
 
 
@@ -2749,11 +2712,6 @@ def deploy_daemon_units(
                     memory_limit=ctx.memory_limit,
                 )
                 _write_container_cmd_to_bash(ctx, f, prestart, 'LVM OSDs use ceph-volume lvm activate')
-        elif daemon_type == NFSGanesha.daemon_type:
-            # add nfs to the rados grace db
-            nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
-            prestart = nfs_ganesha.get_rados_grace_container('add')
-            _write_container_cmd_to_bash(ctx, f, prestart, 'add daemon to rados grace')
         elif daemon_type == CephIscsi.daemon_type:
             f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n')
             ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
@@ -2800,11 +2758,6 @@ def deploy_daemon_units(
                                                     daemon_id),
             )
             _write_container_cmd_to_bash(ctx, f, poststop, 'deactivate osd')
-        elif daemon_type == NFSGanesha.daemon_type:
-            # remove nfs from the rados grace db
-            nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
-            poststop = nfs_ganesha.get_rados_grace_container('remove')
-            _write_container_cmd_to_bash(ctx, f, poststop, 'remove daemon from rados grace')
         elif daemon_type == CephIscsi.daemon_type:
             # make sure we also stop the tcmu container
             ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
index 4c0fcd9dfa4ebeb8813b47367d6c19708538b7e6..91fa3d5e5e9bbe309f7fae29cd3c1df8e9251331 100644 (file)
@@ -1,5 +1,8 @@
 import errno
 import logging
+import os
+import subprocess
+import tempfile
 from typing import Dict, Tuple, Any, List, cast, Optional
 
 from mgr_module import HandleCommandResult
@@ -39,6 +42,9 @@ class NFSService(CephService):
                     if daemon_id is not None:
                         self.fence(daemon_id)
                 del rank_map[rank]
+                nodeid = f'{spec.service_name()}.{rank}'
+                self.mgr.log.info(f'Removing {nodeid} from the ganesha grace table')
+                self.run_grace_tool(cast(NFSServiceSpec, spec), 'remove', nodeid)
                 self.mgr.spec_store.save_rank_map(spec.service_name(), rank_map)
             else:
                 max_gen = max(m.keys())
@@ -69,10 +75,16 @@ class NFSService(CephService):
 
         deps: List[str] = []
 
+        nodeid = f'{daemon_spec.service_name}.{daemon_spec.rank}'
+
         # create the RADOS recovery pool keyring
         rados_user = f'{daemon_type}.{daemon_id}'
         rados_keyring = self.create_keyring(daemon_spec)
 
+        # ensure rank is known to ganesha
+        self.mgr.log.info(f'Ensuring {nodeid} is in the ganesha grace table')
+        self.run_grace_tool(spec, 'add', nodeid)
+
         # create the rados config object
         self.create_rados_config_obj(spec)
 
@@ -84,7 +96,7 @@ class NFSService(CephService):
         def get_ganesha_conf() -> str:
             context = {
                 "user": rados_user,
-                "nodeid": f'{daemon_spec.service_name}.{daemon_spec.rank}',
+                "nodeid": nodeid,
                 "pool": spec.pool,
                 "namespace": spec.namespace if spec.namespace else '',
                 "rgw_user": rgw_user,
@@ -172,6 +184,53 @@ class NFSService(CephService):
 
         return keyring
 
+    def run_grace_tool(self,
+                       spec: NFSServiceSpec,
+                       action: str,
+                       nodeid: str) -> None:
+        # write a temp keyring and referencing config file.  this is a kludge
+        # because the ganesha-grace-tool can only authenticate as a client (and
+        # not a mgr).  Also, it doesn't allow you to pass a keyring location via
+        # the command line, nor does it parse the CEPH_ARGS env var.
+        tmp_id = f'mgr.nfs.grace.{spec.service_name()}'
+        entity = AuthEntity(f'client.{tmp_id}')
+        keyring = self.get_keyring_with_caps(
+            entity,
+            ['mon', 'allow r', 'osd', f'allow rwx pool {spec.pool}']
+        )
+        tmp_keyring = tempfile.NamedTemporaryFile(mode='w', prefix='mgr-grace-keyring')
+        os.fchmod(tmp_keyring.fileno(), 0o600)
+        tmp_keyring.write(keyring)
+        tmp_keyring.flush()
+        tmp_conf = tempfile.NamedTemporaryFile(mode='w', prefix='mgr-grace-conf')
+        tmp_conf.write(self.mgr.get_minimal_ceph_conf())
+        tmp_conf.write(f'\tkeyring = {tmp_keyring.name}\n')
+        tmp_conf.flush()
+        try:
+            cmd: List[str] = [
+                'ganesha-rados-grace',
+                '--cephconf', tmp_conf.name,
+                '--userid', tmp_id,
+                '--pool', cast(str, spec.pool),
+            ]
+            if spec.namespace:
+                cmd += ['--ns', spec.namespace]
+            cmd += [action, nodeid]
+            self.mgr.log.debug(cmd)
+            result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                                    timeout=10)
+            if result.returncode:
+                self.mgr.log.warning(
+                    f'ganesha-rados-grace tool failed: {result.stderr.decode("utf-8")}'
+                )
+                raise RuntimeError(f'grace tool failed: {result.stderr.decode("utf-8")}')
+
+        finally:
+            self.mgr.check_mon_command({
+                'prefix': 'auth rm',
+                'entity': entity,
+            })
+
     def remove_rgw_keyring(self, daemon: DaemonDescription) -> None:
         assert daemon.daemon_id is not None
         daemon_id: str = daemon.daemon_id
index ba6c0addfe139ffc53095fcc7b44d0407c5adfb8..167fcc664b941ff7a9e786aff14660a2a7aa948d 100644 (file)
@@ -754,6 +754,7 @@ class TestCephadm(object):
                 })
 
     @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock())
     @mock.patch("cephadm.module.CephadmOrchestrator.rados", mock.MagicMock())
     def test_nfs(self, cephadm_module):
         with with_host(cephadm_module, 'test'):
@@ -915,6 +916,7 @@ class TestCephadm(object):
     @mock.patch("cephadm.serve.CephadmServe._deploy_cephadm_binary", _deploy_cephadm_binary('test'))
     @mock.patch("subprocess.run", None)
     @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock())
     def test_apply_save(self, spec: ServiceSpec, meth, cephadm_module: CephadmOrchestrator):
         with with_host(cephadm_module, 'test'):
             with with_service(cephadm_module, spec, meth, 'test'):