]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/cephadm: enable ranked daemons for nfs
authorSage Weil <sage@newdream.net>
Fri, 23 Apr 2021 19:33:23 +0000 (15:33 -0400)
committerSage Weil <sage@newdream.net>
Wed, 19 May 2021 12:43:14 +0000 (08:43 -0400)
Use ranked daemons for NFS. Ganesha does not like it if multiple
instances start up with the same rank, but we need stable ranks so that
a rank can "fail over" to a new instance of a new daemon on another host
(with the same rank) for NFS client reclaim to work.

Specify a nodeid of '{service_name}.{rank}' for ganesha.

Include a unique id in the daemon_id just because this avoids some issues
with the create/destroy ordering, and because the daemon_id doesn't matter
much anymore since we are using a stable rank.

Signed-off-by: Sage Weil <sage@newdream.net>
src/cephadm/cephadm
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/services/nfs.py

index ed92c0b017a23dbc442d6729d552f3617a0b18a7..4e1e12e11e4b6278926312fc91550c815e2efab4 100755 (executable)
@@ -503,7 +503,14 @@ class NFSGanesha(object):
             args += ['--ns', self.namespace]
         if self.userid:
             args += ['--userid', self.userid]
-        args += [action, self.get_daemon_name()]
+
+        meta = json.loads(self.ctx.meta_json)
+        if 'service_name' in meta and 'rank' in meta:
+            nodeid = f"{meta['service_name']}.{meta['rank']}"
+        else:
+            nodeid = self.daemon_id
+
+        args += [action, nodeid]
 
         data_dir = get_data_dir(self.fsid, self.ctx.data_dir,
                                 self.daemon_type, self.daemon_id)
index 07450bfaaf5e5bc00ef690796efc7e4ffbdd761c..62bf72f4e1b1f22ce8cc0f359af1dad01f54e1ef 100644 (file)
@@ -584,7 +584,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
         Generate a unique random service name
         """
         suffix = daemon_type not in [
-            'mon', 'crash', 'nfs',
+            'mon', 'crash',
             'prometheus', 'node-exporter', 'grafana', 'alertmanager',
             'container', 'cephadm-exporter',
         ]
index e5773bfb1942304b7b8661d1daa17035450efb27..4e4e0153e8d51507d8082318623813903bfe00d1 100644 (file)
@@ -4,7 +4,7 @@ from typing import Dict, Tuple, Any, List, cast, Optional
 
 from mgr_module import HandleCommandResult
 
-from ceph.deployment.service_spec import NFSServiceSpec
+from ceph.deployment.service_spec import ServiceSpec, NFSServiceSpec
 import rados
 
 from orchestrator import DaemonDescription
@@ -17,6 +17,38 @@ logger = logging.getLogger(__name__)
 class NFSService(CephService):
     TYPE = 'nfs'
 
+    def ranked(self) -> bool:
+        return True
+
+    def fence(self, daemon_id: str) -> None:
+        logger.info(f'Fencing old nfs.{daemon_id}')
+        ret, out, err = self.mgr.mon_command({
+            'prefix': 'auth rm',
+            'entity': f'client.nfs.{daemon_id}',
+        })
+
+        # TODO: block/fence this entity (in case it is still running somewhere)
+
+    def fence_old_ranks(self,
+                        spec: ServiceSpec,
+                        rank_map: Dict[int, Dict[int, Optional[str]]],
+                        num_ranks: int) -> None:
+        for rank, m in list(rank_map.items()):
+            if rank >= num_ranks:
+                for daemon_id in m.values():
+                    if daemon_id is not None:
+                        self.fence(daemon_id)
+                del rank_map[rank]
+                self.mgr.spec_store.save_rank_map(spec.service_name(), rank_map)
+            else:
+                max_gen = max(m.keys())
+                for gen, daemon_id in list(m.items()):
+                    if gen < max_gen:
+                        if daemon_id is not None:
+                            self.fence(daemon_id)
+                        del rank_map[rank][gen]
+                        self.mgr.spec_store.save_rank_map(spec.service_name(), rank_map)
+
     def config(self, spec: NFSServiceSpec, daemon_id: str) -> None:  # type: ignore
         assert self.TYPE == spec.service_type
         assert spec.pool
@@ -51,7 +83,7 @@ class NFSService(CephService):
         # generate the ganesha config
         def get_ganesha_conf() -> str:
             context = dict(user=rados_user,
-                           nodeid=daemon_spec.name(),
+                           nodeid=f'{daemon_spec.service_name}.{daemon_spec.rank}',
                            pool=spec.pool,
                            namespace=spec.namespace if spec.namespace else '',
                            rgw_user=rgw_user,
@@ -141,7 +173,7 @@ class NFSService(CephService):
         entity: AuthEntity = self.get_auth_entity(f'{daemon_id}-rgw')
 
         logger.info(f'Removing key for {entity}')
-        ret, out, err = self.mgr.check_mon_command({
+        self.mgr.check_mon_command({
             'prefix': 'auth rm',
             'entity': entity,
         })