]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: handle use_agent being turned on and off
authorAdam King <adking@redhat.com>
Thu, 2 Sep 2021 17:29:32 +0000 (13:29 -0400)
committerAdam King <adking@redhat.com>
Fri, 24 Sep 2021 11:23:51 +0000 (07:23 -0400)
Signed-off-by: Adam King <adking@redhat.com>
src/pybind/mgr/cephadm/agent.py
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/schedule.py
src/pybind/mgr/cephadm/serve.py
src/pybind/mgr/cephadm/services/cephadmservice.py

index 9b16a4af7eb28b66e654556c2bf4e99d5be88c56..6671b02ad2abca25752f90f57fb14f3efbd33bb9 100644 (file)
@@ -350,7 +350,13 @@ class SSLCerts:
         return (cert_str, key_str)
 
     def get_root_cert(self) -> str:
-        return crypto.dump_certificate(crypto.FILETYPE_PEM, self.root_cert).decode('utf-8')
+        try:
+            return crypto.dump_certificate(crypto.FILETYPE_PEM, self.root_cert).decode('utf-8')
+        except AttributeError:
+            return ''
 
     def get_root_key(self) -> str:
-        return crypto.dump_privatekey(crypto.FILETYPE_PEM, self.root_key).decode('utf-8')
+        try:
+            return crypto.dump_certificate(crypto.FILETYPE_PEM, self.root_key).decode('utf-8')
+        except AttributeError:
+            return ''
index d393e7ad65be170f46aa168a432d13d9c8230612..09112b542fb7d88bfe996c1290d80feb8bdc4d03 100644 (file)
@@ -508,26 +508,17 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
 
         self.config_checker = CephadmConfigChecks(self)
 
-        self.cherrypy_thread = None
+        self.cherrypy_thread = CherryPyThread(self)
+        self.cherrypy_thread.start()
         self.agent_helpers = CephadmAgentHelpers(self)
-
         if self.use_agent:
-            try:
-                if not self.cherrypy_thread:
-                    self.cherrypy_thread = CherryPyThread(self)
-                    self.cherrypy_thread.start()
-                if 'agent' not in self.spec_store:
-                    self.agent_helpers._apply_agent()
-            except Exception as e:
-                self.log.error(f'Failed to initialize agent spec and cherrypy server: {e}')
+            self.agent_helpers._apply_agent()
 
     def shutdown(self) -> None:
         self.log.debug('shutdown')
         self._worker_pool.close()
         self._worker_pool.join()
-        if self.cherrypy_thread:
-            self.cherrypy_thread.shutdown()
-            self.cherrypy_thread = None
+        self.cherrypy_thread.shutdown()
         self.run = False
         self.event.set()
 
index 8f4f02e5e4b325ed51c13f3d1f041c07e277a014..7d45b57eae4d05f092f979f0da7765a26f552520 100644 (file)
@@ -322,7 +322,9 @@ class HostAssignment(object):
         existing = existing_active + existing_standby
 
         # build to_add
-        if not count:
+        if self.service_name == 'agent':
+            to_add = [dd for dd in others]
+        elif not count:
             to_add = [dd for dd in others if dd.hostname not in [
                 h.hostname for h in self.unreachable_hosts]]
         else:
index 50c29e2f43902896abdf58a2c783b77aae076fea..a952fd625c55d37f8116def6c749d92a1e485f7c 100644 (file)
@@ -15,7 +15,6 @@ from orchestrator import OrchestratorError, set_exception_subject, OrchestratorE
     DaemonDescriptionStatus, daemon_type_to_service
 from cephadm.services.cephadmservice import CephadmDaemonDeploySpec
 from cephadm.schedule import HostAssignment
-from cephadm.agent import CherryPyThread
 from cephadm.autotune import MemoryAutotuner
 from cephadm.utils import forall_hosts, cephadmNoImage, is_repo_digest, \
     CephadmNoImage, CEPH_TYPES, ContainerInspectInfo
@@ -93,17 +92,24 @@ class CephadmServe:
                     self._purge_deleted_services()
 
                     if self.mgr.use_agent:
-                        if not self.mgr.cherrypy_thread:
-                            self.mgr.cherrypy_thread = CherryPyThread(self.mgr)
-                            self.mgr.cherrypy_thread.start()
+                        # on the off chance there are still agents hanging around from
+                        # when we turned the config option off, we need to redeploy them
+                        # we can tell they're in that state if we don't have a keyring for
+                        # them in the host cache
+                        for agent in self.mgr.cache.get_daemons_by_service('agent'):
+                            if agent.hostname not in self.mgr.cache.agent_keys:
+                                self.mgr._schedule_daemon_action(agent.name(), 'redeploy')
                         if 'agent' not in self.mgr.spec_store:
                             self.mgr.agent_helpers._apply_agent()
+                        for host in self.mgr.cache.get_hosts():
+                            self.mgr.cache.metadata_up_to_date[host] = False
                     else:
-                        if self.mgr.cherrypy_thread:
-                            self.mgr.cherrypy_thread.shutdown()
-                            self.mgr.cherrypy_thread = None
                         if 'agent' in self.mgr.spec_store:
                             self.mgr.spec_store.rm('agent')
+                        self.mgr.cache.agent_counter = {}
+                        self.mgr.cache.agent_timestamp = {}
+                        self.mgr.cache.agent_keys = {}
+                        self.mgr.cache.agent_ports = {}
 
                     if self.mgr.upgrade.continue_upgrade():
                         continue
@@ -911,7 +917,7 @@ class CephadmServe:
             assert dd.hostname is not None
             assert dd.daemon_type is not None
             assert dd.daemon_id is not None
-            if not spec and dd.daemon_type not in ['mon', 'mgr', 'osd', 'agent']:
+            if not spec and dd.daemon_type not in ['mon', 'mgr', 'osd']:
                 # (mon and mgr specs should always exist; osds aren't matched
                 # to a service spec)
                 self.log.info('Removing orphan daemon %s...' % dd.name())
index 43999e398a7e65ef894a10da23b589e699027d3f..df86156dcbc0c6df050ec66c82bfadc08b437561 100644 (file)
@@ -1026,8 +1026,12 @@ class CephadmAgent(CephService):
                'host': daemon_spec.host,
                'device_enhanced_scan': str(self.mgr.get_module_option('device_enhanced_scan'))}
 
-        assert self.mgr.cherrypy_thread
-        assert self.mgr.cherrypy_thread.ssl_certs.get_root_cert()
+        try:
+            assert self.mgr.cherrypy_thread
+            assert self.mgr.cherrypy_thread.ssl_certs.get_root_cert()
+        except Exception:
+            raise OrchestratorError(
+                'Cannot deploy agent daemons until cephadm endpoint has finished generating certs')
         listener_cert, listener_key = self.mgr.cherrypy_thread.ssl_certs.generate_cert(
             daemon_spec.host)
         config = {