From c8b41564cde38b0aed9c1374837062b982cd8e39 Mon Sep 17 00:00:00 2001 From: Adam King Date: Thu, 2 Sep 2021 13:29:32 -0400 Subject: [PATCH] mgr/cephadm: handle use_agent being turned on and off Signed-off-by: Adam King --- src/pybind/mgr/cephadm/agent.py | 10 +++++++-- src/pybind/mgr/cephadm/module.py | 17 ++++---------- src/pybind/mgr/cephadm/schedule.py | 4 +++- src/pybind/mgr/cephadm/serve.py | 22 ++++++++++++------- .../mgr/cephadm/services/cephadmservice.py | 8 +++++-- 5 files changed, 35 insertions(+), 26 deletions(-) diff --git a/src/pybind/mgr/cephadm/agent.py b/src/pybind/mgr/cephadm/agent.py index 9b16a4af7eb..6671b02ad2a 100644 --- a/src/pybind/mgr/cephadm/agent.py +++ b/src/pybind/mgr/cephadm/agent.py @@ -350,7 +350,13 @@ class SSLCerts: return (cert_str, key_str) def get_root_cert(self) -> str: - return crypto.dump_certificate(crypto.FILETYPE_PEM, self.root_cert).decode('utf-8') + try: + return crypto.dump_certificate(crypto.FILETYPE_PEM, self.root_cert).decode('utf-8') + except AttributeError: + return '' def get_root_key(self) -> str: - return crypto.dump_privatekey(crypto.FILETYPE_PEM, self.root_key).decode('utf-8') + try: + return crypto.dump_certificate(crypto.FILETYPE_PEM, self.root_key).decode('utf-8') + except AttributeError: + return '' diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index d393e7ad65b..09112b542fb 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -508,26 +508,17 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, self.config_checker = CephadmConfigChecks(self) - self.cherrypy_thread = None + self.cherrypy_thread = CherryPyThread(self) + self.cherrypy_thread.start() self.agent_helpers = CephadmAgentHelpers(self) - if self.use_agent: - try: - if not self.cherrypy_thread: - self.cherrypy_thread = CherryPyThread(self) - self.cherrypy_thread.start() - if 'agent' not in self.spec_store: - self.agent_helpers._apply_agent() - except Exception as e: - self.log.error(f'Failed to initialize agent spec and cherrypy server: {e}') + self.agent_helpers._apply_agent() def shutdown(self) -> None: self.log.debug('shutdown') self._worker_pool.close() self._worker_pool.join() - if self.cherrypy_thread: - self.cherrypy_thread.shutdown() - self.cherrypy_thread = None + self.cherrypy_thread.shutdown() self.run = False self.event.set() diff --git a/src/pybind/mgr/cephadm/schedule.py b/src/pybind/mgr/cephadm/schedule.py index 8f4f02e5e4b..7d45b57eae4 100644 --- a/src/pybind/mgr/cephadm/schedule.py +++ b/src/pybind/mgr/cephadm/schedule.py @@ -322,7 +322,9 @@ class HostAssignment(object): existing = existing_active + existing_standby # build to_add - if not count: + if self.service_name == 'agent': + to_add = [dd for dd in others] + elif not count: to_add = [dd for dd in others if dd.hostname not in [ h.hostname for h in self.unreachable_hosts]] else: diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 50c29e2f439..a952fd625c5 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -15,7 +15,6 @@ from orchestrator import OrchestratorError, set_exception_subject, OrchestratorE DaemonDescriptionStatus, daemon_type_to_service from cephadm.services.cephadmservice import CephadmDaemonDeploySpec from cephadm.schedule import HostAssignment -from cephadm.agent import CherryPyThread from cephadm.autotune import MemoryAutotuner from cephadm.utils import forall_hosts, cephadmNoImage, is_repo_digest, \ CephadmNoImage, CEPH_TYPES, ContainerInspectInfo @@ -93,17 +92,24 @@ class CephadmServe: self._purge_deleted_services() if self.mgr.use_agent: - if not self.mgr.cherrypy_thread: - self.mgr.cherrypy_thread = CherryPyThread(self.mgr) - self.mgr.cherrypy_thread.start() + # on the off chance there are still agents hanging around from + # when we turned the config option off, we need to redeploy them + # we can tell they're in that state if we don't have a keyring for + # them in the host cache + for agent in self.mgr.cache.get_daemons_by_service('agent'): + if agent.hostname not in self.mgr.cache.agent_keys: + self.mgr._schedule_daemon_action(agent.name(), 'redeploy') if 'agent' not in self.mgr.spec_store: self.mgr.agent_helpers._apply_agent() + for host in self.mgr.cache.get_hosts(): + self.mgr.cache.metadata_up_to_date[host] = False else: - if self.mgr.cherrypy_thread: - self.mgr.cherrypy_thread.shutdown() - self.mgr.cherrypy_thread = None if 'agent' in self.mgr.spec_store: self.mgr.spec_store.rm('agent') + self.mgr.cache.agent_counter = {} + self.mgr.cache.agent_timestamp = {} + self.mgr.cache.agent_keys = {} + self.mgr.cache.agent_ports = {} if self.mgr.upgrade.continue_upgrade(): continue @@ -911,7 +917,7 @@ class CephadmServe: assert dd.hostname is not None assert dd.daemon_type is not None assert dd.daemon_id is not None - if not spec and dd.daemon_type not in ['mon', 'mgr', 'osd', 'agent']: + if not spec and dd.daemon_type not in ['mon', 'mgr', 'osd']: # (mon and mgr specs should always exist; osds aren't matched # to a service spec) self.log.info('Removing orphan daemon %s...' % dd.name()) diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py index 43999e398a7..df86156dcbc 100644 --- a/src/pybind/mgr/cephadm/services/cephadmservice.py +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -1026,8 +1026,12 @@ class CephadmAgent(CephService): 'host': daemon_spec.host, 'device_enhanced_scan': str(self.mgr.get_module_option('device_enhanced_scan'))} - assert self.mgr.cherrypy_thread - assert self.mgr.cherrypy_thread.ssl_certs.get_root_cert() + try: + assert self.mgr.cherrypy_thread + assert self.mgr.cherrypy_thread.ssl_certs.get_root_cert() + except Exception: + raise OrchestratorError( + 'Cannot deploy agent daemons until cephadm endpoint has finished generating certs') listener_cert, listener_key = self.mgr.cherrypy_thread.ssl_certs.generate_cert( daemon_spec.host) config = { -- 2.47.3