From ab43d6d0c6e696fb365694102e2a26d7ae8cba68 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Juan=20Miguel=20Olmo=20Mart=C3=ADnez?= Date: Wed, 25 Nov 2020 15:40:57 +0100 Subject: [PATCH] mgr/cephadm: Get host facts data in host cache MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Include cephadm gather-facts information in host cache Signed-off-by: Juan Miguel Olmo Martínez --- src/cephadm/cephadm | 18 +++++++++++++++++- src/pybind/mgr/cephadm/inventory.py | 22 ++++++++++++++++++++++ src/pybind/mgr/cephadm/module.py | 7 +++++++ src/pybind/mgr/cephadm/serve.py | 21 +++++++++++++++++++++ 4 files changed, 67 insertions(+), 1 deletion(-) diff --git a/src/cephadm/cephadm b/src/cephadm/cephadm index 8f7dde093c7b3..b0a29cf8535d5 100755 --- a/src/cephadm/cephadm +++ b/src/cephadm/cephadm @@ -5436,7 +5436,6 @@ class HostFacts(): up_secs, _ = raw_time.split() return float(up_secs) - @property def kernel_security(self): # type: () -> Dict[str, str] """Determine the security features enabled in the kernel - SELinux, AppArmor""" @@ -5501,6 +5500,23 @@ class HostFacts(): "description": "Linux Security Module framework is not available" } + @property + def kernel_parameters(self): + # type: () -> Dict[str, str] + """Get kernel parameters required/used in Ceph clusters""" + + k_param = {} + out, _, _ = call_throws(['sysctl', '-a']) + if out: + param_list = out.split('\n') + param_dict = { param.split(" = ")[0]:param.split(" = ")[-1] for param in param_list} + + # return only desired parameters + if 'net.ipv4.ip_nonlocal_bind' in param_dict: + k_param['net.ipv4.ip_nonlocal_bind'] = param_dict['net.ipv4.ip_nonlocal_bind'] + + return k_param + def dump(self): # type: () -> str """Return the attributes of this HostFacts object as json""" diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py index eb0c42faf96e8..a4ad1e26f86c8 100644 --- a/src/pybind/mgr/cephadm/inventory.py +++ b/src/pybind/mgr/cephadm/inventory.py @@ -204,6 +204,8 @@ class HostCache(): self.daemons = {} # type: Dict[str, Dict[str, orchestrator.DaemonDescription]] self.last_daemon_update = {} # type: Dict[str, datetime.datetime] self.devices = {} # type: Dict[str, List[inventory.Device]] + self.facts = {} # type: Dict[str, Dict[str, Any]] + self.last_facts_update = {} # type: Dict[str, datetime.datetime] self.osdspec_previews = {} # type: Dict[str, List[Dict[str, Any]]] self.networks = {} # type: Dict[str, Dict[str, List[str]]] self.last_device_update = {} # type: Dict[str, datetime.datetime] @@ -278,6 +280,11 @@ class HostCache(): self.daemons[host] = dm self.last_daemon_update[host] = datetime.datetime.utcnow() + def update_host_facts(self, host, facts): + # type: (str, Dict[str, Dict[str, Any]]) -> None + self.facts[host] = facts + self.last_facts_update[host] = datetime.datetime.utcnow() + def update_host_devices_networks(self, host, dls, nets): # type: (str, List[inventory.Device], Dict[str,List[str]]) -> None self.devices[host] = dls @@ -366,6 +373,10 @@ class HostCache(): del self.daemons[host] if host in self.devices: del self.devices[host] + if host in self.facts: + del self.facts[host] + if host in self.last_facts_update: + del self.last_facts_update[host] if host in self.osdspec_previews: del self.osdspec_previews[host] if host in self.loading_osdspec_preview: @@ -463,6 +474,17 @@ class HostCache(): return True return False + def host_needs_facts_refresh(self, host): + # type: (str) -> bool + if host in self.mgr.offline_hosts: + logger.debug(f'Host "{host}" marked as offline. Skipping gather facts refresh') + return False + cutoff = datetime.datetime.utcnow() - datetime.timedelta( + seconds=self.mgr.facts_cache_timeout) + if host not in self.last_facts_update or self.last_facts_update[host] < cutoff: + return True + return False + def host_had_daemon_refresh(self, host: str) -> bool: """ ... at least once. diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 3a923738a5b00..1776399358213 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -142,6 +142,12 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, 'default': 10 * 60, 'desc': 'seconds to cache service (daemon) inventory', }, + { + 'name': 'facts_cache_timeout', + 'type': 'secs', + 'default': 1 * 60, + 'desc': 'seconds to cache host facts data', + }, { 'name': 'host_check_interval', 'type': 'secs', @@ -293,6 +299,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, self.ssh_config_file = None # type: Optional[str] self.device_cache_timeout = 0 self.daemon_cache_timeout = 0 + self.facts_cache_timeout = 0 self.host_check_interval = 0 self.mode = '' self.container_image_base = '' diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 502ef8f48dd75..99cfa79e36908 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -110,6 +110,7 @@ class CephadmServe: @forall_hosts def refresh(host): + if self.mgr.cache.host_needs_check(host): r = self._check_host(host) if r is not None: @@ -133,6 +134,12 @@ class CephadmServe: if r: failures.append(r) + if self.mgr.cache.host_needs_facts_refresh(host): + self.log.info(('refreshing %s facts' % host)) + r = self._refresh_facts(host) + if r: + failures.append(r) + if self.mgr.cache.host_needs_osdspec_preview_refresh(host): self.log.debug(f"refreshing OSDSpec previews for {host}") r = self._refresh_host_osdspec_previews(host) @@ -246,6 +253,20 @@ class CephadmServe: self.mgr.cache.save_host(host) return None + def _refresh_facts(self, host): + try: + out, err, code = self.mgr._run_cephadm( + host, cephadmNoImage, 'gather-facts', [], + error_ok=True, no_fsid=True) + + if code: + return 'host %s gather-facts returned %d: %s' % ( + host, code, err) + except Exception as e: + return 'host %s gather facts failed: %s' % (host, e) + self.log.debug('Refreshed host %s facts' % (host)) + self.mgr.cache.update_host_facts(host, json.loads(''.join(out))) + def _refresh_host_devices(self, host) -> Optional[str]: try: out, err, code = self.mgr._run_cephadm( -- 2.39.5