From f93da5a2fd8d516b698bd1f070986d2711e1c379 Mon Sep 17 00:00:00 2001 From: Shweta Bhosale Date: Fri, 14 Feb 2025 14:49:49 +0530 Subject: [PATCH] mgr/cephadm: Deploying prometheus service for the first time, does not update PROMETHEUS_API_HOST url under mgr module Fixes: https://tracker.ceph.com/issues/69942 Signed-off-by: Shweta Bhosale --- src/pybind/mgr/cephadm/module.py | 13 +++- src/pybind/mgr/cephadm/serve.py | 12 ++-- src/pybind/mgr/cephadm/tests/test_cephadm.py | 70 ++++++------------- src/pybind/mgr/cephadm/tests/test_spec.py | 1 + src/pybind/mgr/orchestrator/_interface.py | 7 ++ .../orchestrator/tests/test_orchestrator.py | 1 + 6 files changed, 48 insertions(+), 56 deletions(-) diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 731cd2f31587c..b559fae82faaf 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -43,7 +43,7 @@ from ceph.deployment.service_spec import ( NvmeofServiceSpec, ) from ceph.utils import str_to_datetime, datetime_to_str, datetime_now -from cephadm.serve import CephadmServe +from cephadm.serve import CephadmServe, REQUIRES_POST_ACTIONS from cephadm.services.cephadmservice import CephadmDaemonDeploySpec from cephadm.http_server import CephadmHttpServer from cephadm.agent import CephadmAgentHelpers @@ -624,7 +624,6 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, self.template = TemplateMgr(self) - self.requires_post_actions: Set[str] = set() self.need_connect_dashboard_rgw = False self.config_checker = CephadmConfigChecks(self) @@ -936,6 +935,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, 'error': DaemonDescriptionStatus.error, 'unknown': DaemonDescriptionStatus.error, }[d['state']] + sd = orchestrator.DaemonDescription( daemon_type=daemon_type, daemon_id='.'.join(d['name'].split('.')[1:]), @@ -966,6 +966,15 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, extra_container_args=d.get('extra_container_args'), extra_entrypoint_args=d.get('extra_entrypoint_args'), ) + + if daemon_type in REQUIRES_POST_ACTIONS: + # If post action is required for daemon, then restore value of pending_daemon_config + try: + cached_dd = self.cache.get_daemon(sd.name(), host) + sd.update_pending_daemon_config(cached_dd.pending_daemon_config) + except orchestrator.OrchestratorError: + pass + dm[sd.name()] = sd self.log.debug('Refreshed host %s daemons (%d)' % (host, len(dm))) self.cache.update_host_daemons(host, dm) diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 9eda4d182ee81..32141a19626b3 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -1193,8 +1193,11 @@ class CephadmServe: for daemon_type, daemon_descs in daemons_post.items(): run_post = False for d in daemon_descs: - if d.name() in self.mgr.requires_post_actions: - self.mgr.requires_post_actions.remove(d.name()) + if d.pending_daemon_config: + assert d.hostname is not None + cache_dd = self.mgr.cache.get_daemon(d.name(), d.hostname) + cache_dd.update_pending_daemon_config(False) + self.mgr.cache.save_host(d.hostname) run_post = True if run_post: service_registry.get_service(daemon_type_to_service( @@ -1466,9 +1469,10 @@ class CephadmServe: # just created sd = daemon_spec.to_daemon_description( DaemonDescriptionStatus.starting, 'starting') - self.mgr.cache.add_daemon(daemon_spec.host, sd) + # If daemon requires post action, then mark pending_daemon_config as true if daemon_spec.daemon_type in REQUIRES_POST_ACTIONS: - self.mgr.requires_post_actions.add(daemon_spec.name()) + sd.update_pending_daemon_config(True) + self.mgr.cache.add_daemon(daemon_spec.host, sd) self.mgr.cache.invalidate_host_daemons(daemon_spec.host) if daemon_spec.daemon_type != 'agent': diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index 48e58d9fc7843..0f650bc39582e 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -248,6 +248,7 @@ class TestCephadm(object): 'status_desc': 'starting', 'is_active': False, 'ports': [], + 'pending_daemon_config': False, } ] @@ -900,10 +901,24 @@ class TestCephadm(object): _mon_cmd.assert_any_call( {'prefix': 'dashboard set-grafana-api-url', 'value': 'https://host_fqdn:3000'}, None) + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec(service_type='prometheus'), CephadmOrchestrator.apply_prometheus, 'test'): + with mock.patch("cephadm.module.CephadmOrchestrator.mon_command") as _mon_cmd: + CephadmServe(cephadm_module)._check_daemons() + _mon_cmd.assert_any_call( + {'prefix': 'dashboard set-prometheus-api-host', 'value': 'http://host_fqdn:9095'}, + None) + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec(service_type='alertmanager'), CephadmOrchestrator.apply_alertmanager, 'test'): + with mock.patch("cephadm.module.CephadmOrchestrator.mon_command") as _mon_cmd: + CephadmServe(cephadm_module)._check_daemons() + _mon_cmd.assert_any_call( + {'prefix': 'dashboard set-alertmanager-api-host', 'value': 'http://host_fqdn:9093'}, + None) @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) @mock.patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1.2.3.4') - def test_iscsi_post_actions_with_missing_daemon_in_cache(self, cephadm_module: CephadmOrchestrator): + def test_iscsi_post_actions(self, cephadm_module: CephadmOrchestrator): # https://tracker.ceph.com/issues/52866 with with_host(cephadm_module, 'test1'): with with_host(cephadm_module, 'test2'): @@ -911,55 +926,10 @@ class TestCephadm(object): CephadmServe(cephadm_module)._apply_all_services() assert len(cephadm_module.cache.get_daemons_by_type('iscsi')) == 2 - - # get a daemons from postaction list (ARRGH sets!!) - tempset = cephadm_module.requires_post_actions.copy() - tempdaemon1 = tempset.pop() - tempdaemon2 = tempset.pop() - - # make sure post actions has 2 daemons in it - assert len(cephadm_module.requires_post_actions) == 2 - - # replicate a host cache that is not in sync when check_daemons is called - tempdd1 = cephadm_module.cache.get_daemon(tempdaemon1) - tempdd2 = cephadm_module.cache.get_daemon(tempdaemon2) - host = 'test1' - if 'test1' not in tempdaemon1: - host = 'test2' - cephadm_module.cache.rm_daemon(host, tempdaemon1) - - # Make sure, _check_daemons does a redeploy due to monmap change: - cephadm_module.mock_store_set('_ceph_get', 'mon_map', { - 'modified': datetime_to_str(datetime_now()), - 'fsid': 'foobar', - }) - cephadm_module.notify('mon_map', None) - cephadm_module.mock_store_set('_ceph_get', 'mgr_map', { - 'modules': ['dashboard'] - }) - - with mock.patch("cephadm.module.IscsiService.config_dashboard") as _cfg_db: - CephadmServe(cephadm_module)._check_daemons() - _cfg_db.assert_called_once_with([tempdd2]) - - # post actions still has the other daemon in it and will run next _check_daemons - assert len(cephadm_module.requires_post_actions) == 1 - - # post actions was missed for a daemon - assert tempdaemon1 in cephadm_module.requires_post_actions - - # put the daemon back in the cache - cephadm_module.cache.add_daemon(host, tempdd1) - - _cfg_db.reset_mock() - # replicate serve loop running again - CephadmServe(cephadm_module)._check_daemons() - - # post actions should have been called again - _cfg_db.asset_called() - - # post actions is now empty - assert len(cephadm_module.requires_post_actions) == 0 + for host in ['test1', 'test2']: + d = cephadm_module.cache.daemons[host] + for name, dd in d.items(): + assert dd.pending_daemon_config is True @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) def test_mon_add(self, cephadm_module): diff --git a/src/pybind/mgr/cephadm/tests/test_spec.py b/src/pybind/mgr/cephadm/tests/test_spec.py index 42e590945cd96..668289cf05e50 100644 --- a/src/pybind/mgr/cephadm/tests/test_spec.py +++ b/src/pybind/mgr/cephadm/tests/test_spec.py @@ -286,6 +286,7 @@ def test_dd_octopus(dd_json): del j['daemon_name'] return j + dd_json.update({'pending_daemon_config': False}) assert dd_json == convert_to_old_style_json( DaemonDescription.from_json(dd_json).to_json()) diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index 9d721dd9d83e2..8a431c9cbc8de 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -1179,6 +1179,7 @@ class DaemonDescription(object): rank_generation: Optional[int] = None, extra_container_args: Optional[GeneralArgList] = None, extra_entrypoint_args: Optional[GeneralArgList] = None, + pending_daemon_config: bool = False ) -> None: #: Host is at the same granularity as InventoryHost @@ -1253,6 +1254,7 @@ class DaemonDescription(object): if extra_entrypoint_args: self.extra_entrypoint_args = ArgumentSpec.from_general_args( extra_entrypoint_args) + self.pending_daemon_config = pending_daemon_config def __setattr__(self, name: str, value: Any) -> None: if value is not None and name in ('extra_container_args', 'extra_entrypoint_args'): @@ -1374,6 +1376,9 @@ class DaemonDescription(object): return f'{daemon_type_to_service(self.daemon_type)}.{self.service_id()}' return daemon_type_to_service(self.daemon_type) + def update_pending_daemon_config(self, value: bool) -> None: + self.pending_daemon_config = value + def __repr__(self) -> str: return "({type}.{id})".format(type=self.daemon_type, id=self.daemon_id) @@ -1407,6 +1412,7 @@ class DaemonDescription(object): out['rank'] = self.rank out['rank_generation'] = self.rank_generation out['systemd_unit'] = self.systemd_unit + out['pending_daemon_config'] = self.pending_daemon_config for k in ['last_refresh', 'created', 'started', 'last_deployed', 'last_configured']: @@ -1444,6 +1450,7 @@ class DaemonDescription(object): out['ports'] = self.ports out['ip'] = self.ip out['systemd_unit'] = self.systemd_unit + out['pending_daemon_config'] = self.pending_daemon_config for k in ['last_refresh', 'created', 'started', 'last_deployed', 'last_configured']: diff --git a/src/pybind/mgr/orchestrator/tests/test_orchestrator.py b/src/pybind/mgr/orchestrator/tests/test_orchestrator.py index 3247b06a3993b..079e8d7f84bbd 100644 --- a/src/pybind/mgr/orchestrator/tests/test_orchestrator.py +++ b/src/pybind/mgr/orchestrator/tests/test_orchestrator.py @@ -92,6 +92,7 @@ hostname: ubuntu status: 1 status_desc: starting is_active: false +pending_daemon_config: false events: - 2020-06-10T10:08:22.933241Z daemon:crash.ubuntu [INFO] "Deployed crash.ubuntu on host 'ubuntu'" -- 2.39.5