]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: migrate all legacy nfs exports to new .nfs pool
authorSage Weil <sage@newdream.net>
Fri, 2 Jul 2021 19:53:15 +0000 (15:53 -0400)
committerSebastian Wagner <sewagner@redhat.com>
Thu, 9 Sep 2021 14:17:55 +0000 (16:17 +0200)
Migrate all past NFS pools, whether they were created by mgr/nfs or by
the dashboard, to the new mgr/nfs .nfs pool.

Since this migrations relies on RADOS being available, we have to be a bit
careful here: we only attempt the migration from serve(), not during
module init.

After the exports are re-imported, we destroy existing ganesha daemons so
that new ones will get recreated.  This ensures the (new) daemons have
cephx keys to access the new pool.

Note that no attempt is made to clean up the old NFS pools.  This is out
of paranoia: if something goes wrong, the old NFS configuration data will
still be there.

Signed-off-by: Sage Weil <sage@newdream.net>
(cherry picked from commit c183bccdc6728e8321eec615b1acd88216acabca)

src/pybind/mgr/cephadm/inventory.py
src/pybind/mgr/cephadm/migrations.py
src/pybind/mgr/cephadm/tests/test_migration.py

index d0509b66124eea5118c764feb609f52fc1c4f271..cd398221123e549a581a17c2dcddf519893c374f 100644 (file)
@@ -14,6 +14,7 @@ from ceph.utils import str_to_datetime, datetime_to_str, datetime_now
 from orchestrator import OrchestratorError, HostSpec, OrchestratorEvent, service_to_daemon_types
 
 from .utils import resolve_ip
+from .migrations import queue_migrate_nfs_spec
 
 if TYPE_CHECKING:
     from .module import CephadmOrchestrator
@@ -207,6 +208,12 @@ class SpecStore():
             service_name = k[len(SPEC_STORE_PREFIX):]
             try:
                 j = cast(Dict[str, dict], json.loads(v))
+                if (
+                        (self.mgr.migration_current or 0) < 3
+                        and j['spec'].get('service_type') == 'nfs'
+                ):
+                    self.mgr.log.debug(f'found legacy nfs spec {j}')
+                    queue_migrate_nfs_spec(self.mgr, j)
                 spec = ServiceSpec.from_json(j['spec'])
                 created = str_to_datetime(cast(str, j['created']))
                 self._specs[service_name] = spec
index cf30d15c28e7eeadb6a184bb2a426923beaced44..f5c0e007b2ae738dc7a4ef925fd4e58e3033e570 100644 (file)
@@ -1,5 +1,6 @@
+import json
 import logging
-from typing import TYPE_CHECKING, Iterator
+from typing import TYPE_CHECKING, Iterator, Optional, Dict, Any
 
 from ceph.deployment.service_spec import PlacementSpec, ServiceSpec, HostPlacementSpec
 from cephadm.schedule import HostAssignment
@@ -9,7 +10,7 @@ from orchestrator import OrchestratorError, DaemonDescription
 if TYPE_CHECKING:
     from .module import CephadmOrchestrator
 
-LAST_MIGRATION = 2
+LAST_MIGRATION = 3
 
 logger = logging.getLogger(__name__)
 
@@ -29,12 +30,15 @@ class Migrations:
         # upgrade code, while an old upgrade is still in progress), naming of daemons,
         # fs-layout of the daemons, etc.
         if self.mgr.migration_current is None:
-            self.set(0)
+            self.set(LAST_MIGRATION)
+
+        v = mgr.get_store('nfs_migration_queue')
+        self.nfs_migration_queue = json.loads(v) if v else []
 
         # for some migrations, we don't need to do anything except for
         # setting migration_current = 1.
         # let's try to shortcut things here.
-        self.migrate()
+        self.migrate(True)
 
     def set(self, val: int) -> None:
         self.mgr.set_module_option('migration_current', val)
@@ -49,7 +53,7 @@ class Migrations:
             raise OrchestratorError(
                 "cephadm migration still ongoing. Please wait, until the migration is complete.")
 
-    def migrate(self) -> None:
+    def migrate(self, startup: bool = False) -> None:
         if self.mgr.migration_current == 0:
             if self.migrate_0_1():
                 self.set(1)
@@ -58,6 +62,10 @@ class Migrations:
             if self.migrate_1_2():
                 self.set(2)
 
+        if self.mgr.migration_current == 2 and not startup:
+            if self.migrate_2_3():
+                self.set(3)
+
     def migrate_0_1(self) -> bool:
         """
         Migration 0 -> 1
@@ -160,3 +168,80 @@ class Migrations:
             self.mgr.spec_store.finally_rm(old)
 
         return True
+
+    def migrate_2_3(self) -> bool:
+        if self.nfs_migration_queue:
+            from nfs.cluster import create_ganesha_pool
+
+            create_ganesha_pool(self.mgr)
+            for service_id, pool, ns in self.nfs_migration_queue:
+                if pool != '.nfs':
+                    self.migrate_nfs_spec(service_id, pool, ns)
+            self.nfs_migration_queue = []
+        return True
+
+    def migrate_nfs_spec(self, service_id: str, pool: str, ns: Optional[str]) -> None:
+        self.mgr.log.info(
+            f'Migrating nfs.{service_id} from legacy pool {pool} namespace {ns}'
+        )
+
+        # read exports
+        ioctx = self.mgr.rados.open_ioctx(pool)
+        if ns is not None:
+            ioctx.set_namespace(ns)
+        object_iterator = ioctx.list_objects()
+        exports = []
+        while True:
+            try:
+                obj = object_iterator.__next__()
+                if obj.key.startswith('export-'):
+                    self.mgr.log.debug(f'reading {obj.key}')
+                    exports.append(obj.read().decode())
+            except StopIteration:
+                break
+        self.mgr.log.info(f'Found {len(exports)} exports for legacy nfs.{service_id}')
+
+        # import exports
+        for export in exports:
+            ex = ''
+            for line in export.splitlines():
+                if (
+                        line.startswith('        secret_access_key =')
+                        or line.startswith('        user_id =')
+                ):
+                    continue
+                ex += line + '\n'
+            self.mgr.log.debug(f'importing export: {ex}')
+            ret, out, err = self.mgr.mon_command({
+                'prefix': 'nfs export apply',
+                'cluster_id': service_id
+            }, inbuf=ex)
+            if ret:
+                self.mgr.log.warning(f'Failed to migrate export ({ret}): {err}\nExport was:\n{ex}')
+
+        # redeploy all ganesha daemons to ensures that the daemon
+        # cephx are correct AND container configs are set up properly
+        daemons = [d.name() for d in self.mgr.cache.get_daemons_by_service(f'nfs.{service_id}')]
+        self.mgr.log.info(f'Removing old daemons {daemons}')
+        self.mgr.remove_daemons(daemons)
+
+        # re-save service spec (without pool and namespace properties!)
+        spec = self.mgr.spec_store[f'nfs.{service_id}'].spec
+        self.mgr.spec_store.save(spec)
+
+
+def queue_migrate_nfs_spec(mgr: "CephadmOrchestrator", spec_dict: Dict[Any, Any]) -> None:
+    """
+    After 16.2.5 we dropped the NFSServiceSpec pool and namespace properties.
+    Queue up a migration to process later, once we are sure that RADOS is available
+    and so on.
+    """
+    service_id = spec_dict['spec']['service_id']
+    args = spec_dict['spec'].get('spec', {})
+    pool = args.pop('pool', 'nfs-ganesha')
+    ns = args.pop('namespace', service_id)
+    queued = mgr.get_store('nfs_migration_queue') or '[]'
+    ls = json.loads(queued)
+    ls.append([service_id, pool, ns])
+    mgr.set_store('nfs_migration_queue', json.dumps(ls))
+    mgr.log.info(f'Queued nfs.{service_id} for migration')
index 168e44e749c2789b2c0819c8c3ad59312c97c48c..d69628ea9067a6d4790760279038604b7af521ef 100644 (file)
@@ -48,7 +48,7 @@ def test_migrate_scheduler(cephadm_module: CephadmOrchestrator):
 
             cephadm_module.migration_current = 0
             cephadm_module.migration.migrate()
-            assert cephadm_module.migration_current == 2
+            assert cephadm_module.migration_current >= 2
 
             out = [o.spec.placement for o in wait(
                 cephadm_module, cephadm_module.describe_service())]
@@ -78,7 +78,7 @@ def test_migrate_service_id_mon_one(cephadm_module: CephadmOrchestrator):
 
         cephadm_module.migration_current = 1
         cephadm_module.migration.migrate()
-        assert cephadm_module.migration_current == 2
+        assert cephadm_module.migration_current >= 2
 
         assert len(cephadm_module.spec_store.all_specs) == 1
         assert cephadm_module.spec_store.all_specs['mon'] == ServiceSpec(
@@ -121,7 +121,7 @@ def test_migrate_service_id_mon_two(cephadm_module: CephadmOrchestrator):
 
         cephadm_module.migration_current = 1
         cephadm_module.migration.migrate()
-        assert cephadm_module.migration_current == 2
+        assert cephadm_module.migration_current >= 2
 
         assert len(cephadm_module.spec_store.all_specs) == 1
         assert cephadm_module.spec_store.all_specs['mon'] == ServiceSpec(
@@ -149,3 +149,36 @@ def test_migrate_service_id_mds_one(cephadm_module: CephadmOrchestrator):
 
         # there is nothing to migrate, as the spec is gone now.
         assert len(cephadm_module.spec_store.all_specs) == 0
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_nfs_initial(cephadm_module: CephadmOrchestrator):
+    with with_host(cephadm_module, 'host1'):
+        cephadm_module.set_store(
+            SPEC_STORE_PREFIX + 'mds',
+            json.dumps({
+                'spec': {
+                    'service_type': 'nfs',
+                    'service_id': 'foo',
+                    'placement': {
+                        'hosts': ['host1']
+                    },
+                    'spec': {
+                        'pool': 'mypool',
+                        'namespace': 'foons',
+                    },
+                },
+                'created': datetime_to_str(datetime_now()),
+            }, sort_keys=True),
+        )
+        cephadm_module.migration_current = 1
+        cephadm_module.spec_store.load()
+
+        ls = json.loads(cephadm_module.get_store('nfs_migration_queue'))
+        assert ls == [['foo', 'mypool', 'foons']]
+
+        cephadm_module.migration.migrate(True)
+        assert cephadm_module.migration_current == 2
+
+        cephadm_module.migration.migrate()
+        assert cephadm_module.migration_current == 3