From 77411e7df313f1488e9636c871973818474f9401 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 20 Apr 2021 18:19:26 -0400 Subject: [PATCH] mgr/cephadm: reimplement ceph.conf pushing Use a more generic inventory map of paths to digests to track what we've pushed. Signed-off-by: Sage Weil --- src/pybind/mgr/cephadm/inventory.py | 63 ++++++++++---------- src/pybind/mgr/cephadm/serve.py | 61 ++++++++++++------- src/pybind/mgr/cephadm/tests/test_cephadm.py | 22 +++---- 3 files changed, 79 insertions(+), 67 deletions(-) diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py index df9381a3dcd..a31e02e74ac 100644 --- a/src/pybind/mgr/cephadm/inventory.py +++ b/src/pybind/mgr/cephadm/inventory.py @@ -247,10 +247,10 @@ class HostCache(): This is needed in order to deploy MONs. As this is mostly read-only. - 4. `last_etc_ceph_ceph_conf` O(hosts) + 4. `last_client_files` O(hosts) - Stores the last refresh time for the /etc/ceph/ceph.conf. Used - to avoid deploying new configs when failing over to a new mgr. + Stores the last digest and owner/mode for files we've pushed to /etc/ceph + (ceph.conf or client keyrings). 5. `scheduled_daemon_actions`: O(daemons) @@ -280,7 +280,7 @@ class HostCache(): self.daemon_config_deps = {} # type: Dict[str, Dict[str, Dict[str,Any]]] self.last_host_check = {} # type: Dict[str, datetime.datetime] self.loading_osdspec_preview = set() # type: Set[str] - self.last_etc_ceph_ceph_conf: Dict[str, datetime.datetime] = {} + self.last_client_files: Dict[str, Dict[str, Tuple[str, int, int, int]]] = {} self.registry_login_queue: Set[str] = set() self.scheduled_daemon_actions: Dict[str, Dict[str, str]] = {} @@ -317,6 +317,7 @@ class HostCache(): self.devices[host].append(inventory.Device.from_json(d)) self.networks[host] = j.get('networks_and_interfaces', {}) self.osdspec_previews[host] = j.get('osdspec_previews', {}) + self.last_client_files[host] = j.get('last_client_files', {}) for name, ts in j.get('osdspec_last_applied', {}).items(): self.osdspec_last_applied[host][name] = str_to_datetime(ts) @@ -327,9 +328,6 @@ class HostCache(): } if 'last_host_check' in j: self.last_host_check[host] = str_to_datetime(j['last_host_check']) - if 'last_etc_ceph_ceph_conf' in j: - self.last_etc_ceph_ceph_conf[host] = str_to_datetime( - j['last_etc_ceph_ceph_conf']) self.registry_login_queue.add(host) self.scheduled_daemon_actions[host] = j.get('scheduled_daemon_actions', {}) @@ -394,6 +392,24 @@ class HostCache(): # type: (str, str, datetime.datetime) -> None self.osdspec_last_applied[host][service_name] = ts + def update_client_file(self, + host: str, + path: str, + digest: str, + mode: int, + uid: int, + gid: int) -> None: + if host not in self.last_client_files: + self.last_client_files[host] = {} + self.last_client_files[host][path] = (digest, mode, uid, gid) + + def removed_client_file(self, host: str, path: str) -> None: + if ( + host in self.last_client_files + and path in self.last_client_files[host] + ): + del self.last_client_files[host][path] + def prime_empty_host(self, host): # type: (str) -> None """ @@ -409,6 +425,7 @@ class HostCache(): self.device_refresh_queue.append(host) self.osdspec_previews_refresh_queue.append(host) self.registry_login_queue.add(host) + self.last_client_files[host] = {} def invalidate_host_daemons(self, host): # type: (str) -> None @@ -464,8 +481,8 @@ class HostCache(): if host in self.last_host_check: j['last_host_check'] = datetime_to_str(self.last_host_check[host]) - if host in self.last_etc_ceph_ceph_conf: - j['last_etc_ceph_ceph_conf'] = datetime_to_str(self.last_etc_ceph_ceph_conf[host]) + if host in self.last_client_files: + j['last_client_files'] = self.last_client_files[host] if host in self.scheduled_daemon_actions: j['scheduled_daemon_actions'] = self.scheduled_daemon_actions[host] @@ -499,6 +516,8 @@ class HostCache(): del self.daemon_config_deps[host] if host in self.scheduled_daemon_actions: del self.scheduled_daemon_actions[host] + if host in self.last_client_files: + del self.last_client_files[host] self.mgr.set_store(HOST_CACHE_PREFIX + host, None) def get_hosts(self): @@ -582,6 +601,9 @@ class HostCache(): self.daemon_config_deps[host][name].get('last_config', None) return None, None + def get_host_client_files(self, host: str) -> Dict[str, Tuple[str, int, int, int]]: + return self.last_client_files.get(host, {}) + def host_needs_daemon_refresh(self, host): # type: (str) -> bool if host in self.mgr.offline_hosts: @@ -648,24 +670,6 @@ class HostCache(): seconds=self.mgr.host_check_interval) return host not in self.last_host_check or self.last_host_check[host] < cutoff - def host_needs_new_etc_ceph_ceph_conf(self, host: str) -> bool: - if not self.mgr.manage_etc_ceph_ceph_conf: - return False - if self.mgr.paused: - return False - if host in self.mgr.offline_hosts: - return False - if not self.mgr.last_monmap: - return False - if host not in self.last_etc_ceph_ceph_conf: - return True - if self.mgr.last_monmap > self.last_etc_ceph_ceph_conf[host]: - return True - if self.mgr.extra_ceph_conf_is_newer(self.last_etc_ceph_ceph_conf[host]): - return True - # already up to date: - return False - def osdspec_needs_apply(self, host: str, spec: ServiceSpec) -> bool: if ( host not in self.devices @@ -680,11 +684,6 @@ class HostCache(): return True return self.osdspec_last_applied[host][spec.service_name()] < self.last_device_change[host] - def update_last_etc_ceph_ceph_conf(self, host: str) -> None: - if not self.mgr.last_monmap: - return - self.last_etc_ceph_ceph_conf[host] = datetime_now() - def host_needs_registry_login(self, host: str) -> bool: if host in self.mgr.offline_hosts: return False diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 5db1207f91a..e0028394b4a 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -131,7 +131,14 @@ class CephadmServe: bad_hosts = [] failures = [] - etc_ceph_ceph_conf_hosts = [] + # host -> path -> (mode, uid, gid, content, digest) + client_files: Dict[str, Dict[str, Tuple[int, int, int, bytes, str]]] = {} + + # ceph.conf + if self.mgr.manage_etc_ceph_ceph_conf: + config = self.mgr.get_minimal_ceph_conf().encode('utf-8') + config_digest = ''.join('%02x' % c for c in hashlib.sha256(config).digest()) + if self.mgr.manage_etc_ceph_ceph_conf: try: pspec = PlacementSpec.from_string(self.mgr.manage_etc_ceph_ceph_conf_hosts) @@ -142,7 +149,12 @@ class CephadmServe: networks=self.mgr.cache.networks, ) all_slots, _, _ = ha.place() - etc_ceph_ceph_conf_hosts = [s.hostname for s in all_slots] + for host in {s.hostname for s in all_slots}: + if host not in client_files: + client_files[host] = {} + client_files[host]['/etc/ceph/ceph.conf'] = ( + 0o644, 0, 0, bytes(config), str(config_digest) + ) except Exception as e: self.mgr.log.warning(f'unable to calc conf hosts: {self.mgr.manage_etc_ceph_ceph_conf_hosts}: {e}') @@ -188,14 +200,31 @@ class CephadmServe: if r: failures.append(r) - if ( - host in etc_ceph_ceph_conf_hosts - and self.mgr.cache.host_needs_new_etc_ceph_ceph_conf(host) - ): - self.log.debug(f"deploying new /etc/ceph/ceph.conf on `{host}`") - r = self._deploy_etc_ceph_ceph_conf(host) - if r: - bad_hosts.append(r) + # client files + updated_files = False + old_files = self.mgr.cache.get_host_client_files(host).copy() + for path, m in client_files.get(host, {}).items(): + mode, uid, gid, content, digest = m + if path in old_files: + match = old_files[path] == (digest, mode, uid, gid) + del old_files[path] + if match: + continue + self.log.info(f'Updating {host}:{path}') + self._write_remote_file(host, path, content, mode, uid, gid) + self.mgr.cache.update_client_file(host, path, digest, mode, uid, gid) + updated_files = True + for path in old_files.keys(): + self.log.info(f'Removing {host}:{path}') + with self._remote_connection(host) as tpl: + conn, connr = tpl + out, err, code = remoto.process.check( + conn, + ['rm', '-f', path]) + updated_files = True + self.mgr.cache.removed_client_file(host, path) + if updated_files: + self.mgr.cache.save_host(host) refresh(self.mgr.cache.get_hosts()) @@ -382,18 +411,6 @@ class CephadmServe: # Unset global 'pending' flag for host self.mgr.cache.loading_osdspec_preview.remove(search_host) - def _deploy_etc_ceph_ceph_conf(self, host: str) -> Optional[str]: - config = self.mgr.get_minimal_ceph_conf() - - try: - self._write_remote_file(host, '/etc/ceph/ceph.conf', - config.encode('utf-8'), 0o644, 0, 0) - self.mgr.cache.update_last_etc_ceph_ceph_conf(host) - self.mgr.cache.save_host(host) - except OrchestratorError as e: - return f'failed to create /etc/ceph/ceph.conf on {host}: {str(e)}' - return None - def _check_for_strays(self) -> None: self.log.debug('_check_for_strays') for k in ['CEPHADM_STRAY_HOST', diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index 34394ef3d13..74e31e85483 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -1036,7 +1036,7 @@ class TestCephadm(object): assert cephadm_module.manage_etc_ceph_ceph_conf is False with with_host(cephadm_module, 'test'): - assert not cephadm_module.cache.host_needs_new_etc_ceph_ceph_conf('test') + assert '/etc/ceph/ceph.conf' not in cephadm_module.cache.get_host_client_files('test') with with_host(cephadm_module, 'test'): cephadm_module.set_module_option('manage_etc_ceph_ceph_conf', True) @@ -1047,7 +1047,7 @@ class TestCephadm(object): _write_file.assert_called_with('test', '/etc/ceph/ceph.conf', b'', 0o644, 0, 0) - assert not cephadm_module.cache.host_needs_new_etc_ceph_ceph_conf('test') + assert '/etc/ceph/ceph.conf' in cephadm_module.cache.get_host_client_files('test') # set extra config and expect that we deploy another ceph.conf cephadm_module._set_extra_ceph_conf('[mon]\nk=v') @@ -1056,21 +1056,17 @@ class TestCephadm(object): b'\n\n[mon]\nk=v\n', 0o644, 0, 0) # reload - cephadm_module.cache.last_etc_ceph_ceph_conf = {} + cephadm_module.cache.last_client_files = {} cephadm_module.cache.load() - assert not cephadm_module.cache.host_needs_new_etc_ceph_ceph_conf('test') + assert '/etc/ceph/ceph.conf' in cephadm_module.cache.get_host_client_files('test') # Make sure, _check_daemons does a redeploy due to monmap change: - cephadm_module.mock_store_set('_ceph_get', 'mon_map', { - 'modified': datetime_to_str(datetime_now()), - 'fsid': 'foobar', - }) - cephadm_module.notify('mon_map', mock.MagicMock()) - assert cephadm_module.cache.host_needs_new_etc_ceph_ceph_conf('test') - cephadm_module.cache.last_etc_ceph_ceph_conf = {} - cephadm_module.cache.load() - assert cephadm_module.cache.host_needs_new_etc_ceph_ceph_conf('test') + before_digest = cephadm_module.cache.get_host_client_files('test')['/etc/ceph/ceph.conf'][0] + cephadm_module._set_extra_ceph_conf('[mon]\nk2=v2') + CephadmServe(cephadm_module)._refresh_hosts_and_daemons() + after_digest = cephadm_module.cache.get_host_client_files('test')['/etc/ceph/ceph.conf'][0] + assert before_digest != after_digest def test_etc_ceph_init(self): with with_cephadm_module({'manage_etc_ceph_ceph_conf': True}) as m: -- 2.39.5