]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: Deploying prometheus service for the first time, does not update PROMETH... 61824/head
authorShweta Bhosale <Shweta.Bhosale1@ibm.com>
Fri, 14 Feb 2025 09:19:49 +0000 (14:49 +0530)
committerShweta Bhosale <Shweta.Bhosale1@ibm.com>
Thu, 13 Mar 2025 08:09:22 +0000 (13:39 +0530)
Fixes: https://tracker.ceph.com/issues/69942
Signed-off-by: Shweta Bhosale <Shweta.Bhosale1@ibm.com>
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/serve.py
src/pybind/mgr/cephadm/tests/test_cephadm.py
src/pybind/mgr/cephadm/tests/test_spec.py
src/pybind/mgr/orchestrator/_interface.py
src/pybind/mgr/orchestrator/tests/test_orchestrator.py

index 731cd2f31587c966607eee90951b5a0372e1ebfb..b559fae82faaf7a707e4751aa465f27a77c5f395 100644 (file)
@@ -43,7 +43,7 @@ from ceph.deployment.service_spec import (
     NvmeofServiceSpec,
 )
 from ceph.utils import str_to_datetime, datetime_to_str, datetime_now
-from cephadm.serve import CephadmServe
+from cephadm.serve import CephadmServe, REQUIRES_POST_ACTIONS
 from cephadm.services.cephadmservice import CephadmDaemonDeploySpec
 from cephadm.http_server import CephadmHttpServer
 from cephadm.agent import CephadmAgentHelpers
@@ -624,7 +624,6 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
 
         self.template = TemplateMgr(self)
 
-        self.requires_post_actions: Set[str] = set()
         self.need_connect_dashboard_rgw = False
 
         self.config_checker = CephadmConfigChecks(self)
@@ -936,6 +935,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
                     'error': DaemonDescriptionStatus.error,
                     'unknown': DaemonDescriptionStatus.error,
                 }[d['state']]
+
             sd = orchestrator.DaemonDescription(
                 daemon_type=daemon_type,
                 daemon_id='.'.join(d['name'].split('.')[1:]),
@@ -966,6 +966,15 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
                 extra_container_args=d.get('extra_container_args'),
                 extra_entrypoint_args=d.get('extra_entrypoint_args'),
             )
+
+            if daemon_type in REQUIRES_POST_ACTIONS:
+                # If post action is required for daemon, then restore value of pending_daemon_config
+                try:
+                    cached_dd = self.cache.get_daemon(sd.name(), host)
+                    sd.update_pending_daemon_config(cached_dd.pending_daemon_config)
+                except orchestrator.OrchestratorError:
+                    pass
+
             dm[sd.name()] = sd
         self.log.debug('Refreshed host %s daemons (%d)' % (host, len(dm)))
         self.cache.update_host_daemons(host, dm)
index 9eda4d182ee81a03bee6b07aa3fb45aa2b5b2d54..32141a19626b3e7b93e6ed35c54d97c55818974a 100644 (file)
@@ -1193,8 +1193,11 @@ class CephadmServe:
         for daemon_type, daemon_descs in daemons_post.items():
             run_post = False
             for d in daemon_descs:
-                if d.name() in self.mgr.requires_post_actions:
-                    self.mgr.requires_post_actions.remove(d.name())
+                if d.pending_daemon_config:
+                    assert d.hostname is not None
+                    cache_dd = self.mgr.cache.get_daemon(d.name(), d.hostname)
+                    cache_dd.update_pending_daemon_config(False)
+                    self.mgr.cache.save_host(d.hostname)
                     run_post = True
             if run_post:
                 service_registry.get_service(daemon_type_to_service(
@@ -1466,9 +1469,10 @@ class CephadmServe:
                         # just created
                         sd = daemon_spec.to_daemon_description(
                             DaemonDescriptionStatus.starting, 'starting')
-                        self.mgr.cache.add_daemon(daemon_spec.host, sd)
+                        # If daemon requires post action, then mark pending_daemon_config as true
                         if daemon_spec.daemon_type in REQUIRES_POST_ACTIONS:
-                            self.mgr.requires_post_actions.add(daemon_spec.name())
+                            sd.update_pending_daemon_config(True)
+                        self.mgr.cache.add_daemon(daemon_spec.host, sd)
                     self.mgr.cache.invalidate_host_daemons(daemon_spec.host)
 
                 if daemon_spec.daemon_type != 'agent':
index 48e58d9fc78433b56ab06f0d24465fb1f66e7b1c..0f650bc39582e378086f05637c1dd19f05d1ff9e 100644 (file)
@@ -248,6 +248,7 @@ class TestCephadm(object):
                         'status_desc': 'starting',
                         'is_active': False,
                         'ports': [],
+                        'pending_daemon_config': False,
                     }
                 ]
 
@@ -900,10 +901,24 @@ class TestCephadm(object):
                     _mon_cmd.assert_any_call(
                         {'prefix': 'dashboard set-grafana-api-url', 'value': 'https://host_fqdn:3000'},
                         None)
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, ServiceSpec(service_type='prometheus'), CephadmOrchestrator.apply_prometheus, 'test'):
+                with mock.patch("cephadm.module.CephadmOrchestrator.mon_command") as _mon_cmd:
+                    CephadmServe(cephadm_module)._check_daemons()
+                    _mon_cmd.assert_any_call(
+                        {'prefix': 'dashboard set-prometheus-api-host', 'value': 'http://host_fqdn:9095'},
+                        None)
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, ServiceSpec(service_type='alertmanager'), CephadmOrchestrator.apply_alertmanager, 'test'):
+                with mock.patch("cephadm.module.CephadmOrchestrator.mon_command") as _mon_cmd:
+                    CephadmServe(cephadm_module)._check_daemons()
+                    _mon_cmd.assert_any_call(
+                        {'prefix': 'dashboard set-alertmanager-api-host', 'value': 'http://host_fqdn:9093'},
+                        None)
 
     @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
     @mock.patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1.2.3.4')
-    def test_iscsi_post_actions_with_missing_daemon_in_cache(self, cephadm_module: CephadmOrchestrator):
+    def test_iscsi_post_actions(self, cephadm_module: CephadmOrchestrator):
         # https://tracker.ceph.com/issues/52866
         with with_host(cephadm_module, 'test1'):
             with with_host(cephadm_module, 'test2'):
@@ -911,55 +926,10 @@ class TestCephadm(object):
 
                     CephadmServe(cephadm_module)._apply_all_services()
                     assert len(cephadm_module.cache.get_daemons_by_type('iscsi')) == 2
-
-                    # get a daemons from postaction list (ARRGH sets!!)
-                    tempset = cephadm_module.requires_post_actions.copy()
-                    tempdaemon1 = tempset.pop()
-                    tempdaemon2 = tempset.pop()
-
-                    # make sure post actions has 2 daemons in it
-                    assert len(cephadm_module.requires_post_actions) == 2
-
-                    # replicate a host cache that is not in sync when check_daemons is called
-                    tempdd1 = cephadm_module.cache.get_daemon(tempdaemon1)
-                    tempdd2 = cephadm_module.cache.get_daemon(tempdaemon2)
-                    host = 'test1'
-                    if 'test1' not in tempdaemon1:
-                        host = 'test2'
-                    cephadm_module.cache.rm_daemon(host, tempdaemon1)
-
-                    # Make sure, _check_daemons does a redeploy due to monmap change:
-                    cephadm_module.mock_store_set('_ceph_get', 'mon_map', {
-                        'modified': datetime_to_str(datetime_now()),
-                        'fsid': 'foobar',
-                    })
-                    cephadm_module.notify('mon_map', None)
-                    cephadm_module.mock_store_set('_ceph_get', 'mgr_map', {
-                        'modules': ['dashboard']
-                    })
-
-                    with mock.patch("cephadm.module.IscsiService.config_dashboard") as _cfg_db:
-                        CephadmServe(cephadm_module)._check_daemons()
-                        _cfg_db.assert_called_once_with([tempdd2])
-
-                        # post actions still has the other daemon in it and will run next _check_daemons
-                        assert len(cephadm_module.requires_post_actions) == 1
-
-                        # post actions was missed for a daemon
-                        assert tempdaemon1 in cephadm_module.requires_post_actions
-
-                        # put the daemon back in the cache
-                        cephadm_module.cache.add_daemon(host, tempdd1)
-
-                        _cfg_db.reset_mock()
-                        # replicate serve loop running again
-                        CephadmServe(cephadm_module)._check_daemons()
-
-                        # post actions should have been called again
-                        _cfg_db.asset_called()
-
-                        # post actions is now empty
-                        assert len(cephadm_module.requires_post_actions) == 0
+                    for host in ['test1', 'test2']:
+                        d = cephadm_module.cache.daemons[host]
+                        for name, dd in d.items():
+                            assert dd.pending_daemon_config is True
 
     @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
     def test_mon_add(self, cephadm_module):
index 42e590945cd961df25a49c5f028b3eebdb7df5eb..668289cf05e508e5ab75bc61ff54e5272489daf2 100644 (file)
@@ -286,6 +286,7 @@ def test_dd_octopus(dd_json):
         del j['daemon_name']
         return j
 
+    dd_json.update({'pending_daemon_config': False})
     assert dd_json == convert_to_old_style_json(
         DaemonDescription.from_json(dd_json).to_json())
 
index 9d721dd9d83e27b04158b2c6e72452d754a5a576..8a431c9cbc8de8e526acbe97e55ed180a9168bda 100644 (file)
@@ -1179,6 +1179,7 @@ class DaemonDescription(object):
                  rank_generation: Optional[int] = None,
                  extra_container_args: Optional[GeneralArgList] = None,
                  extra_entrypoint_args: Optional[GeneralArgList] = None,
+                 pending_daemon_config: bool = False
                  ) -> None:
 
         #: Host is at the same granularity as InventoryHost
@@ -1253,6 +1254,7 @@ class DaemonDescription(object):
         if extra_entrypoint_args:
             self.extra_entrypoint_args = ArgumentSpec.from_general_args(
                 extra_entrypoint_args)
+        self.pending_daemon_config = pending_daemon_config
 
     def __setattr__(self, name: str, value: Any) -> None:
         if value is not None and name in ('extra_container_args', 'extra_entrypoint_args'):
@@ -1374,6 +1376,9 @@ class DaemonDescription(object):
             return f'{daemon_type_to_service(self.daemon_type)}.{self.service_id()}'
         return daemon_type_to_service(self.daemon_type)
 
+    def update_pending_daemon_config(self, value: bool) -> None:
+        self.pending_daemon_config = value
+
     def __repr__(self) -> str:
         return "<DaemonDescription>({type}.{id})".format(type=self.daemon_type,
                                                          id=self.daemon_id)
@@ -1407,6 +1412,7 @@ class DaemonDescription(object):
         out['rank'] = self.rank
         out['rank_generation'] = self.rank_generation
         out['systemd_unit'] = self.systemd_unit
+        out['pending_daemon_config'] = self.pending_daemon_config
 
         for k in ['last_refresh', 'created', 'started', 'last_deployed',
                   'last_configured']:
@@ -1444,6 +1450,7 @@ class DaemonDescription(object):
         out['ports'] = self.ports
         out['ip'] = self.ip
         out['systemd_unit'] = self.systemd_unit
+        out['pending_daemon_config'] = self.pending_daemon_config
 
         for k in ['last_refresh', 'created', 'started', 'last_deployed',
                   'last_configured']:
index 3247b06a3993bd9ebb0a6c8de3b2f0a3869580c1..079e8d7f84bbd72048277cd27a5d1d290dc58aac 100644 (file)
@@ -92,6 +92,7 @@ hostname: ubuntu
 status: 1
 status_desc: starting
 is_active: false
+pending_daemon_config: false
 events:
 - 2020-06-10T10:08:22.933241Z daemon:crash.ubuntu [INFO] "Deployed crash.ubuntu on
   host 'ubuntu'"