From: Guillaume Abrioux Date: Fri, 1 Dec 2023 08:03:58 +0000 (+0000) Subject: cephadm: gracefully shutdown the agent prior to removing X-Git-Tag: v19.3.0~102^2~23 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=e9a2450f897aca61181522bf64b215d5295154b0;p=ceph.git cephadm: gracefully shutdown the agent prior to removing When the agent is removed, the daemon is abruptly stopped. Since the node-proxy logic runs from within the cephadm agent, it leaves an active RedFish session. The idea is to gracefully shutdown the agent so node-proxy can catch that event and make sure it closes the current active RedFish session prior to shutting down. Signed-off-by: Guillaume Abrioux --- diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py index aa5ae7570531..e7193ccc29b6 100755 --- a/src/cephadm/cephadm.py +++ b/src/cephadm/cephadm.py @@ -1280,12 +1280,13 @@ class MgrListener(Thread): conn.send(err_str.encode()) logger.error(err_str) else: - conn.send(b'ACK') - if 'config' in data: - self.agent.wakeup() - self.agent.ls_gatherer.wakeup() - self.agent.volume_gatherer.wakeup() - logger.debug(f'Got mgr message {data}') + if 'counter' in data: + conn.send(b'ACK') + if 'config' in data: + self.agent.wakeup() + self.agent.ls_gatherer.wakeup() + self.agent.volume_gatherer.wakeup() + logger.debug(f'Got mgr message {data}') except Exception as e: logger.error(f'Mgr Listener encountered exception: {e}') @@ -1293,17 +1294,23 @@ class MgrListener(Thread): self.stop = True def handle_json_payload(self, data: Dict[Any, Any]) -> None: - self.agent.ack = int(data['counter']) - if 'config' in data: - logger.info('Received new config from mgr') - config = data['config'] - for filename in config: - if filename in self.agent.required_files: - file_path = os.path.join(self.agent.daemon_dir, filename) - with write_new(file_path) as f: - f.write(config[filename]) - self.agent.pull_conf_settings() - self.agent.wakeup() + if 'counter' in data: + self.agent.ack = int(data['counter']) + if 'config' in data: + logger.info('Received new config from mgr') + config = data['config'] + for filename in config: + if filename in self.agent.required_files: + file_path = os.path.join(self.agent.daemon_dir, filename) + with write_new(file_path) as f: + f.write(config[filename]) + self.agent.pull_conf_settings() + self.agent.wakeup() + elif 'node_proxy_shutdown' in data: + self.agent.shutdown() + else: + raise RuntimeError('No valid data received.') + @register_daemon_form diff --git a/src/pybind/mgr/cephadm/agent.py b/src/pybind/mgr/cephadm/agent.py index d5f7d3161cf9..03266a6c7a75 100644 --- a/src/pybind/mgr/cephadm/agent.py +++ b/src/pybind/mgr/cephadm/agent.py @@ -896,6 +896,16 @@ class CephadmAgentHelpers: host, self.mgr.agent_cache.agent_ports[host], payload, self.mgr, daemon_spec) message_thread.start() + def _shutdown_node_proxy(self) -> None: + hosts = set([h for h in self.mgr.cache.get_hosts() if + (h in self.mgr.agent_cache.agent_ports and not self.mgr.agent_cache.messaging_agent(h))]) + + for host in hosts: + payload: Dict[str, Any] = {'node_proxy_shutdown': host} + message_thread = AgentMessageThread( + host, self.mgr.agent_cache.agent_ports[host], payload, self.mgr) + message_thread.start() + def _request_ack_all_not_up_to_date(self) -> None: self.mgr.agent_helpers._request_agent_acks( set([h for h in self.mgr.cache.get_hosts() if @@ -971,10 +981,11 @@ class CephadmAgentHelpers: if 'agent' in self.mgr.spec_store: self.mgr.spec_store.rm('agent') need_apply = True - self.mgr.agent_cache.agent_counter = {} - self.mgr.agent_cache.agent_timestamp = {} - self.mgr.agent_cache.agent_keys = {} - self.mgr.agent_cache.agent_ports = {} + if not self.mgr.cache.get_daemons_by_service('agent'): + self.mgr.agent_cache.agent_counter = {} + self.mgr.agent_cache.agent_timestamp = {} + self.mgr.agent_cache.agent_keys = {} + self.mgr.agent_cache.agent_ports = {} return need_apply def _check_agent(self, host: str) -> bool: diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py index f1d405edda0b..1681be003487 100644 --- a/src/pybind/mgr/cephadm/services/cephadmservice.py +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -1236,6 +1236,16 @@ class CephadmAgent(CephService): return daemon_spec + def pre_remove(self, daemon: DaemonDescription) -> None: + super().pre_remove(daemon) + + assert daemon.daemon_id is not None + daemon_id: str = daemon.daemon_id + + logger.info('Removing agent %s...' % daemon_id) + + self.mgr.agent_helpers._shutdown_node_proxy() + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: agent = self.mgr.http_server.agent try: