]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: reimplement ceph.conf pushing
authorSage Weil <sage@newdream.net>
Tue, 20 Apr 2021 22:19:26 +0000 (18:19 -0400)
committerSage Weil <sage@newdream.net>
Tue, 4 May 2021 16:22:15 +0000 (11:22 -0500)
Use a more generic inventory map of paths to digests to track what we've
pushed.

Signed-off-by: Sage Weil <sage@newdream.net>
(cherry picked from commit 77411e7df313f1488e9636c871973818474f9401)

src/pybind/mgr/cephadm/inventory.py
src/pybind/mgr/cephadm/serve.py
src/pybind/mgr/cephadm/tests/test_cephadm.py

index 89fe71584a6377197139ffb94c92a1fcbdf5fddf..5826abd79349c4e605724f89075a307454bb63ad 100644 (file)
@@ -247,10 +247,10 @@ class HostCache():
 
     This is needed in order to deploy MONs. As this is mostly read-only.
 
-    4. `last_etc_ceph_ceph_conf` O(hosts)
+    4. `last_client_files` O(hosts)
 
-    Stores the last refresh time for the /etc/ceph/ceph.conf. Used
-    to avoid deploying new configs when failing over to a new mgr.
+    Stores the last digest and owner/mode for files we've pushed to /etc/ceph
+    (ceph.conf or client keyrings).
 
     5. `scheduled_daemon_actions`: O(daemons)
 
@@ -280,7 +280,7 @@ class HostCache():
         self.daemon_config_deps = {}   # type: Dict[str, Dict[str, Dict[str,Any]]]
         self.last_host_check = {}      # type: Dict[str, datetime.datetime]
         self.loading_osdspec_preview = set()  # type: Set[str]
-        self.last_etc_ceph_ceph_conf: Dict[str, datetime.datetime] = {}
+        self.last_client_files: Dict[str, Dict[str, Tuple[str, int, int, int]]] = {}
         self.registry_login_queue: Set[str] = set()
 
         self.scheduled_daemon_actions: Dict[str, Dict[str, str]] = {}
@@ -317,6 +317,7 @@ class HostCache():
                     self.devices[host].append(inventory.Device.from_json(d))
                 self.networks[host] = j.get('networks_and_interfaces', {})
                 self.osdspec_previews[host] = j.get('osdspec_previews', {})
+                self.last_client_files[host] = j.get('last_client_files', {})
                 for name, ts in j.get('osdspec_last_applied', {}).items():
                     self.osdspec_last_applied[host][name] = str_to_datetime(ts)
 
@@ -327,9 +328,6 @@ class HostCache():
                     }
                 if 'last_host_check' in j:
                     self.last_host_check[host] = str_to_datetime(j['last_host_check'])
-                if 'last_etc_ceph_ceph_conf' in j:
-                    self.last_etc_ceph_ceph_conf[host] = str_to_datetime(
-                        j['last_etc_ceph_ceph_conf'])
                 self.registry_login_queue.add(host)
                 self.scheduled_daemon_actions[host] = j.get('scheduled_daemon_actions', {})
 
@@ -394,6 +392,24 @@ class HostCache():
         # type: (str, str, datetime.datetime) -> None
         self.osdspec_last_applied[host][service_name] = ts
 
+    def update_client_file(self,
+                           host: str,
+                           path: str,
+                           digest: str,
+                           mode: int,
+                           uid: int,
+                           gid: int) -> None:
+        if host not in self.last_client_files:
+            self.last_client_files[host] = {}
+        self.last_client_files[host][path] = (digest, mode, uid, gid)
+
+    def removed_client_file(self, host: str, path: str) -> None:
+        if (
+            host in self.last_client_files
+            and path in self.last_client_files[host]
+        ):
+            del self.last_client_files[host][path]
+
     def prime_empty_host(self, host):
         # type: (str) -> None
         """
@@ -409,6 +425,7 @@ class HostCache():
         self.device_refresh_queue.append(host)
         self.osdspec_previews_refresh_queue.append(host)
         self.registry_login_queue.add(host)
+        self.last_client_files[host] = {}
 
     def invalidate_host_daemons(self, host):
         # type: (str) -> None
@@ -464,8 +481,8 @@ class HostCache():
         if host in self.last_host_check:
             j['last_host_check'] = datetime_to_str(self.last_host_check[host])
 
-        if host in self.last_etc_ceph_ceph_conf:
-            j['last_etc_ceph_ceph_conf'] = datetime_to_str(self.last_etc_ceph_ceph_conf[host])
+        if host in self.last_client_files:
+            j['last_client_files'] = self.last_client_files[host]
         if host in self.scheduled_daemon_actions:
             j['scheduled_daemon_actions'] = self.scheduled_daemon_actions[host]
 
@@ -499,6 +516,8 @@ class HostCache():
             del self.daemon_config_deps[host]
         if host in self.scheduled_daemon_actions:
             del self.scheduled_daemon_actions[host]
+        if host in self.last_client_files:
+            del self.last_client_files[host]
         self.mgr.set_store(HOST_CACHE_PREFIX + host, None)
 
     def get_hosts(self):
@@ -587,6 +606,9 @@ class HostCache():
                     self.daemon_config_deps[host][name].get('last_config', None)
         return None, None
 
+    def get_host_client_files(self, host: str) -> Dict[str, Tuple[str, int, int, int]]:
+        return self.last_client_files.get(host, {})
+
     def host_needs_daemon_refresh(self, host):
         # type: (str) -> bool
         if host in self.mgr.offline_hosts:
@@ -653,24 +675,6 @@ class HostCache():
             seconds=self.mgr.host_check_interval)
         return host not in self.last_host_check or self.last_host_check[host] < cutoff
 
-    def host_needs_new_etc_ceph_ceph_conf(self, host: str) -> bool:
-        if not self.mgr.manage_etc_ceph_ceph_conf:
-            return False
-        if self.mgr.paused:
-            return False
-        if host in self.mgr.offline_hosts:
-            return False
-        if not self.mgr.last_monmap:
-            return False
-        if host not in self.last_etc_ceph_ceph_conf:
-            return True
-        if self.mgr.last_monmap > self.last_etc_ceph_ceph_conf[host]:
-            return True
-        if self.mgr.extra_ceph_conf_is_newer(self.last_etc_ceph_ceph_conf[host]):
-            return True
-        # already up to date:
-        return False
-
     def osdspec_needs_apply(self, host: str, spec: ServiceSpec) -> bool:
         if (
             host not in self.devices
@@ -685,11 +689,6 @@ class HostCache():
             return True
         return self.osdspec_last_applied[host][spec.service_name()] < self.last_device_change[host]
 
-    def update_last_etc_ceph_ceph_conf(self, host: str) -> None:
-        if not self.mgr.last_monmap:
-            return
-        self.last_etc_ceph_ceph_conf[host] = datetime_now()
-
     def host_needs_registry_login(self, host: str) -> bool:
         if host in self.mgr.offline_hosts:
             return False
index 29589c26d0bec11e700cc496fd7f0f77d6e0898d..d9c03900dbea1f4b8c121602b6d934ab08aa7fbe 100644 (file)
@@ -131,7 +131,14 @@ class CephadmServe:
         bad_hosts = []
         failures = []
 
-        etc_ceph_ceph_conf_hosts = []
+        # host -> path -> (mode, uid, gid, content, digest)
+        client_files: Dict[str, Dict[str, Tuple[int, int, int, bytes, str]]] = {}
+
+        # ceph.conf
+        if self.mgr.manage_etc_ceph_ceph_conf:
+            config = self.mgr.get_minimal_ceph_conf().encode('utf-8')
+            config_digest = ''.join('%02x' % c for c in hashlib.sha256(config).digest())
+
         if self.mgr.manage_etc_ceph_ceph_conf:
             try:
                 pspec = PlacementSpec.from_string(self.mgr.manage_etc_ceph_ceph_conf_hosts)
@@ -142,7 +149,12 @@ class CephadmServe:
                     networks=self.mgr.cache.networks,
                 )
                 all_slots, _, _ = ha.place()
-                etc_ceph_ceph_conf_hosts = [s.hostname for s in all_slots]
+                for host in {s.hostname for s in all_slots}:
+                    if host not in client_files:
+                        client_files[host] = {}
+                    client_files[host]['/etc/ceph/ceph.conf'] = (
+                        0o644, 0, 0, bytes(config), str(config_digest)
+                    )
             except Exception as e:
                 self.mgr.log.warning(f'unable to calc conf hosts: {self.mgr.manage_etc_ceph_ceph_conf_hosts}: {e}')
 
@@ -188,14 +200,31 @@ class CephadmServe:
                 if r:
                     failures.append(r)
 
-            if (
-                host in etc_ceph_ceph_conf_hosts
-                and self.mgr.cache.host_needs_new_etc_ceph_ceph_conf(host)
-            ):
-                self.log.debug(f"deploying new /etc/ceph/ceph.conf on `{host}`")
-                r = self._deploy_etc_ceph_ceph_conf(host)
-                if r:
-                    bad_hosts.append(r)
+            # client files
+            updated_files = False
+            old_files = self.mgr.cache.get_host_client_files(host).copy()
+            for path, m in client_files.get(host, {}).items():
+                mode, uid, gid, content, digest = m
+                if path in old_files:
+                    match = old_files[path] == (digest, mode, uid, gid)
+                    del old_files[path]
+                    if match:
+                        continue
+                self.log.info(f'Updating {host}:{path}')
+                self._write_remote_file(host, path, content, mode, uid, gid)
+                self.mgr.cache.update_client_file(host, path, digest, mode, uid, gid)
+                updated_files = True
+            for path in old_files.keys():
+                self.log.info(f'Removing {host}:{path}')
+                with self._remote_connection(host) as tpl:
+                    conn, connr = tpl
+                    out, err, code = remoto.process.check(
+                        conn,
+                        ['rm', '-f', path])
+                updated_files = True
+                self.mgr.cache.removed_client_file(host, path)
+            if updated_files:
+                self.mgr.cache.save_host(host)
 
         refresh(self.mgr.cache.get_hosts())
 
@@ -382,18 +411,6 @@ class CephadmServe:
         # Unset global 'pending' flag for host
         self.mgr.cache.loading_osdspec_preview.remove(search_host)
 
-    def _deploy_etc_ceph_ceph_conf(self, host: str) -> Optional[str]:
-        config = self.mgr.get_minimal_ceph_conf()
-
-        try:
-            self._write_remote_file(host, '/etc/ceph/ceph.conf',
-                                    config.encode('utf-8'), 0o644, 0, 0)
-            self.mgr.cache.update_last_etc_ceph_ceph_conf(host)
-            self.mgr.cache.save_host(host)
-        except OrchestratorError as e:
-            return f'failed to create /etc/ceph/ceph.conf on {host}: {str(e)}'
-        return None
-
     def _check_for_strays(self) -> None:
         self.log.debug('_check_for_strays')
         for k in ['CEPHADM_STRAY_HOST',
index ab37d9ead6349e2b81566b695af62db36eb63f44..e15fe409d2339a81c55bda8156947538725a9497 100644 (file)
@@ -1067,7 +1067,7 @@ class TestCephadm(object):
         assert cephadm_module.manage_etc_ceph_ceph_conf is False
 
         with with_host(cephadm_module, 'test'):
-            assert not cephadm_module.cache.host_needs_new_etc_ceph_ceph_conf('test')
+            assert '/etc/ceph/ceph.conf' not in cephadm_module.cache.get_host_client_files('test')
 
         with with_host(cephadm_module, 'test'):
             cephadm_module.set_module_option('manage_etc_ceph_ceph_conf', True)
@@ -1078,7 +1078,7 @@ class TestCephadm(object):
             _write_file.assert_called_with('test', '/etc/ceph/ceph.conf', b'',
                                            0o644, 0, 0)
 
-            assert not cephadm_module.cache.host_needs_new_etc_ceph_ceph_conf('test')
+            assert '/etc/ceph/ceph.conf' in cephadm_module.cache.get_host_client_files('test')
 
             # set extra config and expect that we deploy another ceph.conf
             cephadm_module._set_extra_ceph_conf('[mon]\nk=v')
@@ -1087,21 +1087,17 @@ class TestCephadm(object):
                                            b'\n\n[mon]\nk=v\n', 0o644, 0, 0)
 
             # reload
-            cephadm_module.cache.last_etc_ceph_ceph_conf = {}
+            cephadm_module.cache.last_client_files = {}
             cephadm_module.cache.load()
 
-            assert not cephadm_module.cache.host_needs_new_etc_ceph_ceph_conf('test')
+            assert '/etc/ceph/ceph.conf' in cephadm_module.cache.get_host_client_files('test')
 
             # Make sure, _check_daemons does a redeploy due to monmap change:
-            cephadm_module.mock_store_set('_ceph_get', 'mon_map', {
-                'modified': datetime_to_str(datetime_now()),
-                'fsid': 'foobar',
-            })
-            cephadm_module.notify('mon_map', mock.MagicMock())
-            assert cephadm_module.cache.host_needs_new_etc_ceph_ceph_conf('test')
-            cephadm_module.cache.last_etc_ceph_ceph_conf = {}
-            cephadm_module.cache.load()
-            assert cephadm_module.cache.host_needs_new_etc_ceph_ceph_conf('test')
+            before_digest = cephadm_module.cache.get_host_client_files('test')['/etc/ceph/ceph.conf'][0]
+            cephadm_module._set_extra_ceph_conf('[mon]\nk2=v2')
+            CephadmServe(cephadm_module)._refresh_hosts_and_daemons()
+            after_digest = cephadm_module.cache.get_host_client_files('test')['/etc/ceph/ceph.conf'][0]
+            assert before_digest != after_digest
 
     def test_etc_ceph_init(self):
         with with_cephadm_module({'manage_etc_ceph_ceph_conf': True}) as m: