]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/cephadm: re-factoring the dependencies calculation code
authorRedouane Kachach <rkachach@ibm.com>
Tue, 14 Jan 2025 09:34:27 +0000 (10:34 +0100)
committerRedouane Kachach <rkachach@ibm.com>
Tue, 14 Jan 2025 09:34:27 +0000 (10:34 +0100)
currently, the dependency logic is duplicated between the different Service
classes and the module.py::_calc_daemon_deps function, which can lead
to issues such as BUGSs, difficulty in maintenance, and other problems
associated with duplicated code. In this change, we are consolidating
all the dependency logic into the Service subclasses to eliminate this
duplication. This way, we also force anybody creating a new Service to
think about its potential dependencies.

Fixes: https://tracker.ceph.com/issues/69021
Signed-off-by: Redouane Kachach <rkachach@ibm.com>
src/pybind/mgr/cephadm/inventory.py
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/serve.py
src/pybind/mgr/cephadm/services/cephadmservice.py
src/pybind/mgr/cephadm/services/ingress.py
src/pybind/mgr/cephadm/services/iscsi.py
src/pybind/mgr/cephadm/services/jaeger.py
src/pybind/mgr/cephadm/services/mgmt_gateway.py
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/services/node_proxy.py
src/pybind/mgr/cephadm/tests/test_services.py

index 550604fc55be978a75763af25bc0bae012a283f3..d67c5a14abb0f817bdb4fa07545772e4123c449a 100644 (file)
@@ -1338,11 +1338,16 @@ class HostCache():
 
     def get_daemons_by_type(self, service_type: str, host: str = '') -> List[orchestrator.DaemonDescription]:
         assert service_type not in ['keepalived', 'haproxy']
-
         daemons = self.daemons[host].values() if host else self._get_daemons()
-
         return [d for d in daemons if d.daemon_type in service_to_daemon_types(service_type)]
 
+    def get_daemons_by_types(self, daemon_types: List[str]) -> List[str]:
+        daemon_names = []
+        for daemon_type in daemon_types:
+            for dd in self.get_daemons_by_type(daemon_type):
+                daemon_names.append(dd.name())
+        return daemon_names
+
     def get_daemon_types(self, hostname: str) -> Set[str]:
         """Provide a list of the types of daemons on the host"""
         return cast(Set[str], {d.daemon_type for d in self.daemons[hostname].values()})
index 6690153d435af4473a02a116bd3da3f919c35bd0..afa715bb9125f86af9e897d300dad496ae722fde 100644 (file)
@@ -36,7 +36,7 @@ from ceph.deployment.drive_group import DriveGroupSpec
 from ceph.deployment.service_spec import \
     ServiceSpec, PlacementSpec, \
     HostPlacementSpec, IngressSpec, \
-    TunedProfileSpec, IscsiServiceSpec, \
+    TunedProfileSpec, \
     MgmtGatewaySpec
 from ceph.utils import str_to_datetime, datetime_to_str, datetime_now
 from cephadm.serve import CephadmServe
@@ -2931,124 +2931,9 @@ Then run the following:
                           spec: Optional[ServiceSpec],
                           daemon_type: str,
                           daemon_id: str) -> List[str]:
-
-        def get_daemon_names(daemons: List[str]) -> List[str]:
-            daemon_names = []
-            for daemon_type in daemons:
-                for dd in self.cache.get_daemons_by_type(daemon_type):
-                    daemon_names.append(dd.name())
-            return daemon_names
-
-        prom_cred_hash = None
-        alertmgr_cred_hash = None
-        security_enabled, mgmt_gw_enabled, _ = self._get_security_config()
-        if security_enabled:
-            alertmanager_user, alertmanager_password = self._get_alertmanager_credentials()
-            prometheus_user, prometheus_password = self._get_prometheus_credentials()
-            if prometheus_user and prometheus_password:
-                prom_cred_hash = f'{utils.md5_hash(prometheus_user + prometheus_password)}'
-            if alertmanager_user and alertmanager_password:
-                alertmgr_cred_hash = f'{utils.md5_hash(alertmanager_user + alertmanager_password)}'
-
-        deps = []
-        if daemon_type == 'haproxy':
-            # because cephadm creates new daemon instances whenever
-            # port or ip changes, identifying daemons by name is
-            # sufficient to detect changes.
-            if not spec:
-                return []
-            ingress_spec = cast(IngressSpec, spec)
-            assert ingress_spec.backend_service
-            daemons = self.cache.get_daemons_by_service(ingress_spec.backend_service)
-            deps = [d.name() for d in daemons]
-        elif daemon_type == 'keepalived':
-            # because cephadm creates new daemon instances whenever
-            # port or ip changes, identifying daemons by name is
-            # sufficient to detect changes.
-            if not spec:
-                return []
-            daemons = self.cache.get_daemons_by_service(spec.service_name())
-            deps = [d.name() for d in daemons if d.daemon_type == 'haproxy']
-        elif daemon_type == 'agent':
-            root_cert = ''
-            server_port = ''
-            try:
-                server_port = str(self.http_server.agent.server_port)
-                root_cert = self.cert_mgr.get_root_ca()
-            except Exception:
-                pass
-            deps = sorted([self.get_mgr_ip(), server_port, root_cert,
-                           str(self.device_enhanced_scan)])
-        elif daemon_type == 'node-proxy':
-            root_cert = ''
-            server_port = ''
-            try:
-                server_port = str(self.http_server.agent.server_port)
-                root_cert = self.cert_mgr.get_root_ca()
-            except Exception:
-                pass
-            deps = sorted([self.get_mgr_ip(), server_port, root_cert])
-        elif daemon_type == 'iscsi':
-            if spec:
-                iscsi_spec = cast(IscsiServiceSpec, spec)
-                deps = [self.iscsi_service.get_trusted_ips(iscsi_spec)]
-            else:
-                deps = [self.get_mgr_ip()]
-        elif daemon_type == 'prometheus':
-            if not mgmt_gw_enabled:
-                # for prometheus we add the active mgr as an explicit dependency,
-                # this way we force a redeploy after a mgr failover
-                deps.append(self.get_active_mgr().name())
-            deps.append(str(self.get_module_option_ex('prometheus', 'server_port', 9283)))
-            deps.append(str(self.service_discovery_port))
-            # prometheus yaml configuration file (generated by prometheus.yml.j2) contains
-            # a scrape_configs section for each service type. This should be included only
-            # when at least one daemon of the corresponding service is running. Therefore,
-            # an explicit dependency is added for each service-type to force a reconfig
-            # whenever the number of daemons for those service-type changes from 0 to greater
-            # than zero and vice versa.
-            deps += [s for s in ['node-exporter', 'alertmanager']
-                     if self.cache.get_daemons_by_service(s)]
-            if len(self.cache.get_daemons_by_type('ingress')) > 0:
-                deps.append('ingress')
-            # add dependency on ceph-exporter daemons
-            deps += [d.name() for d in self.cache.get_daemons_by_service('ceph-exporter')]
-            deps += [d.name() for d in self.cache.get_daemons_by_service('mgmt-gateway')]
-            deps += [d.name() for d in self.cache.get_daemons_by_service('oauth2-proxy')]
-            if prom_cred_hash is not None:
-                deps.append(prom_cred_hash)
-            if alertmgr_cred_hash is not None:
-                deps.append(alertmgr_cred_hash)
-        elif daemon_type == 'grafana':
-            deps += get_daemon_names(['prometheus', 'loki', 'mgmt-gateway', 'oauth2-proxy'])
-            if prom_cred_hash is not None:
-                deps.append(prom_cred_hash)
-        elif daemon_type == 'alertmanager':
-            deps += get_daemon_names(['alertmanager', 'snmp-gateway', 'mgmt-gateway', 'oauth2-proxy'])
-            if not mgmt_gw_enabled:
-                deps += get_daemon_names(['mgr'])
-            if alertmgr_cred_hash is not None:
-                deps.append(alertmgr_cred_hash)
-        elif daemon_type == 'promtail':
-            deps += get_daemon_names(['loki'])
-        elif daemon_type in ['ceph-exporter', 'node-exporter']:
-            deps += get_daemon_names(['mgmt-gateway'])
-        elif daemon_type == JaegerAgentService.TYPE:
-            for dd in self.cache.get_daemons_by_type(JaegerCollectorService.TYPE):
-                assert dd.hostname is not None
-                port = dd.ports[0] if dd.ports else JaegerCollectorService.DEFAULT_SERVICE_PORT
-                deps.append(build_url(host=dd.hostname, port=port).lstrip('/'))
-            deps = sorted(deps)
-        elif daemon_type == 'mgmt-gateway':
-            deps = MgmtGatewayService.get_dependencies(self)
-        else:
-            # this daemon type doesn't need deps mgmt
-            pass
-
-        if daemon_type in ['prometheus', 'node-exporter', 'alertmanager', 'grafana',
-                           'ceph-exporter']:
-            deps.append(f'secure_monitoring_stack:{self.secure_monitoring_stack}')
-
+        svc_type = daemon_type_to_service(daemon_type)
+        svc_cls = self.cephadm_services.get(svc_type, None)
+        deps = svc_cls.get_dependencies(self, spec, daemon_type) if svc_cls else []
         return sorted(deps)
 
     @forall_hosts
index 8e9cd00fa813675f21478ce2ce29202c8683576e..87b3a1df85155b6551082354b31ac97dc1d0bbef 100644 (file)
@@ -1129,8 +1129,8 @@ class CephadmServe:
                     dd.name()))
                 action = 'reconfig'
             elif last_deps != deps:
-                self.log.debug(f'{dd.name()} deps {last_deps} -> {deps}')
-                self.log.info(f'Reconfiguring {dd.name()} (dependencies changed)...')
+                sym_diff = set(deps).symmetric_difference(last_deps)
+                self.log.info(f'Reconfiguring {dd.name()} deps {last_deps} -> {deps} (diff {sym_diff})')
                 action = 'reconfig'
                 # we need only redeploy if secure_monitoring_stack or mgmt-gateway value has changed:
                 # TODO(redo): check if we should just go always with redeploy (it's fast enough)
index 4f83d7bb0fb52e844e711ae045a4897922e5ecd3..607e1bd9019e6d770b2ec728fce73d238eb9cc99 100644 (file)
@@ -265,6 +265,12 @@ class CephadmService(metaclass=ABCMeta):
     def TYPE(self) -> str:
         pass
 
+    @classmethod
+    def get_dependencies(cls, mgr: "CephadmOrchestrator",
+                         spec: Optional[ServiceSpec] = None,
+                         daemon_type: Optional[str] = None) -> List[str]:
+        return []
+
     def __init__(self, mgr: "CephadmOrchestrator"):
         self.mgr: "CephadmOrchestrator" = mgr
 
@@ -576,6 +582,7 @@ class CephadmService(metaclass=ABCMeta):
 
 
 class CephService(CephadmService):
+
     def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
         # Ceph.daemons (mon, mgr, mds, osd, etc)
         cephadm_config = self.get_config_and_keyring(
@@ -1301,6 +1308,15 @@ class CephExporterService(CephService):
     TYPE = 'ceph-exporter'
     DEFAULT_SERVICE_PORT = 9926
 
+    @classmethod
+    def get_dependencies(cls, mgr: "CephadmOrchestrator",
+                         spec: Optional[ServiceSpec] = None,
+                         daemon_type: Optional[str] = None) -> List[str]:
+
+        deps = [f'secure_monitoring_stack:{mgr.secure_monitoring_stack}']
+        deps += mgr.cache.get_daemons_by_types(['mgmt-gateway'])
+        return sorted(deps)
+
     def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
         assert self.TYPE == daemon_spec.daemon_type
         spec = cast(CephExporterSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
@@ -1330,11 +1346,7 @@ class CephExporterService(CephService):
         daemon_spec.keyring = keyring
         daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
         daemon_spec.final_config = merge_dicts(daemon_spec.final_config, exporter_config)
-
-        deps = []
-        deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('mgmt-gateway')]
-        deps += [f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}']
-        daemon_spec.deps = deps
+        daemon_spec.deps = self.get_dependencies(self.mgr)
 
         return daemon_spec
 
@@ -1379,6 +1391,20 @@ class CephfsMirrorService(CephService):
 class CephadmAgent(CephService):
     TYPE = 'agent'
 
+    @classmethod
+    def get_dependencies(cls, mgr: "CephadmOrchestrator",
+                         spec: Optional[ServiceSpec] = None,
+                         daemon_type: Optional[str] = None) -> List[str]:
+        agent = mgr.http_server.agent
+        return sorted(
+            [
+                str(mgr.get_mgr_ip()),
+                str(agent.server_port),
+                mgr.cert_mgr.get_root_ca(),
+                str(mgr.get_module_option("device_enhanced_scan")),
+            ]
+        )
+
     def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
         assert self.TYPE == daemon_spec.daemon_type
         daemon_id, host = daemon_spec.daemon_id, daemon_spec.host
index 7381ef67d7e06ce64265b1a7a1a6391cfddf3468..442458f2711d35397d5a9692a5fb94651cec0c4a 100644 (file)
@@ -2,7 +2,7 @@ import ipaddress
 import logging
 import random
 import string
-from typing import List, Dict, Any, Tuple, cast, Optional
+from typing import List, Dict, Any, Tuple, cast, Optional, TYPE_CHECKING
 
 from ceph.deployment.service_spec import ServiceSpec, IngressSpec
 from mgr_util import build_url
@@ -10,6 +10,9 @@ from cephadm import utils
 from orchestrator import OrchestratorError, DaemonDescription
 from cephadm.services.cephadmservice import CephadmDaemonDeploySpec, CephService
 
+if TYPE_CHECKING:
+    from ..module import CephadmOrchestrator
+
 logger = logging.getLogger(__name__)
 
 
@@ -17,6 +20,16 @@ class IngressService(CephService):
     TYPE = 'ingress'
     MAX_KEEPALIVED_PASS_LEN = 8
 
+    @classmethod
+    def get_dependencies(cls, mgr: "CephadmOrchestrator",
+                         spec: Optional[ServiceSpec] = None,
+                         daemon_type: Optional[str] = None) -> List[str]:
+        if daemon_type == 'haproxy':
+            return IngressService.get_haproxy_dependencies(mgr, spec)
+        elif daemon_type == 'keepalived':
+            return IngressService.get_keepalived_dependencies(mgr, spec)
+        return []
+
     def primary_daemon_type(self, spec: Optional[ServiceSpec] = None) -> str:
         if spec:
             ispec = cast(IngressSpec, spec)
@@ -75,6 +88,18 @@ class IngressService(CephService):
 
         return daemon_spec
 
+    @staticmethod
+    def get_haproxy_dependencies(mgr: "CephadmOrchestrator", spec: Optional[ServiceSpec]) -> List[str]:
+        # because cephadm creates new daemon instances whenever
+        # port or ip changes, identifying daemons by name is
+        # sufficient to detect changes.
+        if not spec:
+            return []
+        ingress_spec = cast(IngressSpec, spec)
+        assert ingress_spec.backend_service
+        daemons = mgr.cache.get_daemons_by_service(ingress_spec.backend_service)
+        return sorted([d.name() for d in daemons])
+
     def haproxy_generate_config(
             self,
             daemon_spec: CephadmDaemonDeploySpec,
@@ -86,7 +111,6 @@ class IngressService(CephService):
                 f'{spec.service_name()} backend service {spec.backend_service} does not exist')
         backend_spec = self.mgr.spec_store[spec.backend_service].spec
         daemons = self.mgr.cache.get_daemons_by_service(spec.backend_service)
-        deps = [d.name() for d in daemons]
 
         # generate password?
         pw_key = f'{spec.service_name()}/monitor_password'
@@ -201,7 +225,7 @@ class IngressService(CephService):
                 ssl_cert = '\n'.join(ssl_cert)
             config_files['files']['haproxy.pem'] = ssl_cert
 
-        return config_files, sorted(deps)
+        return config_files, self.get_haproxy_dependencies(self.mgr, spec)
 
     def keepalived_prepare_create(
             self,
@@ -220,6 +244,16 @@ class IngressService(CephService):
 
         return daemon_spec
 
+    @staticmethod
+    def get_keepalived_dependencies(mgr: "CephadmOrchestrator", spec: Optional[ServiceSpec]) -> List[str]:
+        # because cephadm creates new daemon instances whenever
+        # port or ip changes, identifying daemons by name is
+        # sufficient to detect changes.
+        if not spec:
+            return []
+        daemons = mgr.cache.get_daemons_by_service(spec.service_name())
+        return sorted([d.name() for d in daemons if d.daemon_type == 'haproxy'])
+
     def keepalived_generate_config(
             self,
             daemon_spec: CephadmDaemonDeploySpec,
@@ -252,8 +286,6 @@ class IngressService(CephService):
             raise OrchestratorError(
                 f'Failed to generate keepalived.conf: No daemons deployed for {spec.service_name()}')
 
-        deps = sorted([d.name() for d in daemons if d.daemon_type == 'haproxy'])
-
         host = daemon_spec.host
         hosts = sorted(list(set([host] + [str(d.hostname) for d in daemons])))
 
@@ -394,4 +426,4 @@ class IngressService(CephService):
             }
         }
 
-        return config_file, deps
+        return config_file, self.get_keepalived_dependencies(self.mgr, spec)
index d2773c19d367eb67c9ca4910b518d8d96bc9a9bd..963a845ec74da7fa5ceacde13f6a4d341b57327e 100644 (file)
@@ -2,19 +2,33 @@ import errno
 import json
 import logging
 import subprocess
-from typing import List, cast, Optional
+from typing import List, cast, Optional, TYPE_CHECKING
 from ipaddress import ip_address, IPv6Address
 
 from mgr_module import HandleCommandResult
-from ceph.deployment.service_spec import IscsiServiceSpec
+from ceph.deployment.service_spec import IscsiServiceSpec, ServiceSpec
 
 from orchestrator import DaemonDescription, DaemonDescriptionStatus
 from .cephadmservice import CephadmDaemonDeploySpec, CephService
 from .. import utils
 
+if TYPE_CHECKING:
+    from ..module import CephadmOrchestrator
+
 logger = logging.getLogger(__name__)
 
 
+def get_trusted_ips(mgr: "CephadmOrchestrator", spec: IscsiServiceSpec) -> str:
+    # add active mgr ip address to trusted list so dashboard can access
+    trusted_ip_list = spec.trusted_ip_list if spec.trusted_ip_list else ''
+    mgr_ip = mgr.get_mgr_ip()
+    if mgr_ip not in [s.strip() for s in trusted_ip_list.split(',')]:
+        if trusted_ip_list:
+            trusted_ip_list += ','
+        trusted_ip_list += mgr_ip
+    return trusted_ip_list
+
+
 class IscsiService(CephService):
     TYPE = 'iscsi'
 
@@ -23,15 +37,15 @@ class IscsiService(CephService):
         assert spec.pool
         self.mgr._check_pool_exists(spec.pool, spec.service_name())
 
-    def get_trusted_ips(self, spec: IscsiServiceSpec) -> str:
-        # add active mgr ip address to trusted list so dashboard can access
-        trusted_ip_list = spec.trusted_ip_list if spec.trusted_ip_list else ''
-        mgr_ip = self.mgr.get_mgr_ip()
-        if mgr_ip not in [s.strip() for s in trusted_ip_list.split(',')]:
-            if trusted_ip_list:
-                trusted_ip_list += ','
-            trusted_ip_list += mgr_ip
-        return trusted_ip_list
+    @classmethod
+    def get_dependencies(cls, mgr: "CephadmOrchestrator",
+                         spec: Optional[ServiceSpec] = None,
+                         daemon_type: Optional[str] = None) -> List[str]:
+        if spec:
+            iscsi_spec = cast(IscsiServiceSpec, spec)
+            return [get_trusted_ips(mgr, iscsi_spec)]
+        else:
+            return [mgr.get_mgr_ip()]
 
     def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
         assert self.TYPE == daemon_spec.daemon_type
@@ -68,8 +82,7 @@ class IscsiService(CephService):
                 'val': key_data,
             })
 
-        trusted_ip_list = self.get_trusted_ips(spec)
-
+        trusted_ip_list = get_trusted_ips(self.mgr, spec)
         context = {
             'client_name': '{}.{}'.format(utils.name_to_config_section('iscsi'), igw_id),
             'trusted_ip_list': trusted_ip_list,
@@ -80,7 +93,7 @@ class IscsiService(CephService):
         daemon_spec.keyring = keyring
         daemon_spec.extra_files = {'iscsi-gateway.cfg': igw_conf}
         daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
-        daemon_spec.deps = [trusted_ip_list]
+        daemon_spec.deps = self.get_dependencies(self.mgr, spec)
         return daemon_spec
 
     def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
index c83c765d0394af0ac608d360ff8fd4230a04441b..6c415512eefae8254b2f59b812d0ebd0233ea8fb 100644 (file)
@@ -1,8 +1,11 @@
-from typing import List, cast
+from typing import List, cast, Optional, TYPE_CHECKING
 from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec
-from ceph.deployment.service_spec import TracingSpec
+from ceph.deployment.service_spec import TracingSpec, ServiceSpec
 from mgr_util import build_url
 
+if TYPE_CHECKING:
+    from ..module import CephadmOrchestrator
+
 
 class ElasticSearchService(CephadmService):
     TYPE = 'elasticsearch'
@@ -17,19 +20,30 @@ class JaegerAgentService(CephadmService):
     TYPE = 'jaeger-agent'
     DEFAULT_SERVICE_PORT = 6799
 
+    @classmethod
+    def get_dependencies(cls, mgr: "CephadmOrchestrator",
+                         spec: Optional[ServiceSpec] = None,
+                         daemon_type: Optional[str] = None) -> List[str]:
+        deps = []  # type: List[str]
+        for dd in mgr.cache.get_daemons_by_type(JaegerCollectorService.TYPE):
+            # scrape jaeger-collector nodes
+            assert dd.hostname is not None
+            port = dd.ports[0] if dd.ports else JaegerCollectorService.DEFAULT_SERVICE_PORT
+            url = build_url(host=dd.hostname, port=port).lstrip('/')
+            deps.append(url)
+        return sorted(deps)
+
     def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
         assert self.TYPE == daemon_spec.daemon_type
         collectors = []
-        deps: List[str] = []
         for dd in self.mgr.cache.get_daemons_by_type(JaegerCollectorService.TYPE):
             # scrape jaeger-collector nodes
             assert dd.hostname is not None
             port = dd.ports[0] if dd.ports else JaegerCollectorService.DEFAULT_SERVICE_PORT
             url = build_url(host=dd.hostname, port=port).lstrip('/')
             collectors.append(url)
-            deps.append(url)
         daemon_spec.final_config = {'collector_nodes': ",".join(collectors)}
-        daemon_spec.deps = sorted(deps)
+        daemon_spec.deps = self.get_dependencies(self.mgr)
         return daemon_spec
 
 
index 0897ce99ff7756944795e1628302fc037ed53a28..0706a9ad17752f168b21d73a90b7a09cb7e3530e 100644 (file)
@@ -1,8 +1,8 @@
 import logging
-from typing import List, Any, Tuple, Dict, cast, TYPE_CHECKING
+from typing import List, Any, Tuple, Dict, cast, Optional, TYPE_CHECKING
 
 from orchestrator import DaemonDescription
-from ceph.deployment.service_spec import MgmtGatewaySpec, GrafanaSpec
+from ceph.deployment.service_spec import MgmtGatewaySpec, GrafanaSpec, ServiceSpec
 from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec, get_dashboard_endpoints
 
 if TYPE_CHECKING:
@@ -83,8 +83,10 @@ class MgmtGatewayService(CephadmService):
             sd_endpoints.append(f"{addr}:{self.mgr.service_discovery_port}")
         return sd_endpoints
 
-    @staticmethod
-    def get_dependencies(mgr: "CephadmOrchestrator") -> List[str]:
+    @classmethod
+    def get_dependencies(cls, mgr: "CephadmOrchestrator",
+                         spec: Optional[ServiceSpec] = None,
+                         daemon_type: Optional[str] = None) -> List[str]:
         # url_prefix for the following services depends on the presence of mgmt-gateway
         deps = [
             f'{d.name()}:{d.ports[0]}' if d.ports else d.name()
index 9c5b5a112f336b6aab2573d332ab1268991d72a0..bd0620f595f5dc21a7874acc8adefe16e3a0ba38 100644 (file)
@@ -2,7 +2,7 @@ import errno
 import logging
 import os
 import socket
-from typing import List, Any, Tuple, Dict, Optional, cast
+from typing import List, Any, Tuple, Dict, Optional, cast, TYPE_CHECKING
 import ipaddress
 
 from mgr_module import HandleCommandResult
@@ -15,6 +15,9 @@ from mgr_util import verify_tls, ServerConfigException, build_url, get_cert_issu
 from ceph.deployment.utils import wrap_ipv6
 from .. import utils
 
+if TYPE_CHECKING:
+    from ..module import CephadmOrchestrator
+
 logger = logging.getLogger(__name__)
 
 
@@ -85,22 +88,26 @@ class GrafanaService(CephadmService):
             'mgmt_gw_ips': ','.join(mgmt_gw_ips),
         })
 
-    def calculate_grafana_deps(self, security_enabled: bool) -> List[str]:
+    @classmethod
+    def get_dependencies(cls, mgr: "CephadmOrchestrator",
+                         spec: Optional[ServiceSpec] = None,
+                         daemon_type: Optional[str] = None) -> List[str]:
 
         deps = []  # type: List[str]
-        deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}')
+        security_enabled, mgmt_gw_enabled, _ = mgr._get_security_config()
+        deps.append(f'secure_monitoring_stack:{mgr.secure_monitoring_stack}')
 
         # in case security is enabled we have to reconfig when prom user/pass changes
-        prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials()
+        prometheus_user, prometheus_password = mgr._get_prometheus_credentials()
         if security_enabled and prometheus_user and prometheus_password:
             deps.append(f'{utils.md5_hash(prometheus_user + prometheus_password)}')
 
         # adding a dependency for mgmt-gateway because the usage of url_prefix relies on its presence.
         # another dependency is added for oauth-proxy as Grafana login is delegated to this service when enabled.
         for service in ['prometheus', 'loki', 'mgmt-gateway', 'oauth2-proxy']:
-            deps += [d.name() for d in self.mgr.cache.get_daemons_by_service(service)]
+            deps += [d.name() for d in mgr.cache.get_daemons_by_service(service)]
 
-        return deps
+        return sorted(deps)
 
     def generate_prom_services(self, security_enabled: bool, mgmt_gw_enabled: bool) -> List[str]:
 
@@ -134,7 +141,6 @@ class GrafanaService(CephadmService):
 
         cert, pkey = self.prepare_certificates(daemon_spec)
         security_enabled, mgmt_gw_enabled, oauth2_enabled = self.mgr._get_security_config()
-        deps = self.calculate_grafana_deps(security_enabled)
         grafana_ini = self.generate_grafana_ini(daemon_spec, mgmt_gw_enabled, oauth2_enabled)
         grafana_data_sources = self.generate_data_sources(security_enabled, mgmt_gw_enabled, cert, pkey)
         # the path of the grafana dashboards are assumed from the providers.yml.j2 file by grafana
@@ -166,7 +172,7 @@ class GrafanaService(CephadmService):
                     dashboard = f.read()
                     config_file['files'][f'/etc/grafana/provisioning/dashboards/{file_name}'] = dashboard
 
-        return config_file, sorted(deps)
+        return config_file, self.get_dependencies(self.mgr)
 
     def prepare_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str]:
         cert = self.mgr.cert_key_store.get_cert('grafana_cert', host=daemon_spec.host)
@@ -297,9 +303,27 @@ class AlertmanagerService(CephadmService):
         cert, key = self.mgr.cert_mgr.generate_cert([host_fqdn, "alertmanager_servers"], node_ip)
         return cert, key
 
+    @classmethod
+    def get_dependencies(cls, mgr: "CephadmOrchestrator",
+                         spec: Optional[ServiceSpec] = None,
+                         daemon_type: Optional[str] = None) -> List[str]:
+        deps = []
+        deps.append(f'secure_monitoring_stack:{mgr.secure_monitoring_stack}')
+        deps = deps + mgr.cache.get_daemons_by_types(['alertmanager', 'snmp-gateway', 'mgmt-gateway', 'oauth2-proxy'])
+        security_enabled, mgmt_gw_enabled, _ = mgr._get_security_config()
+        if security_enabled:
+            alertmanager_user, alertmanager_password = mgr._get_alertmanager_credentials()
+            if alertmanager_user and alertmanager_password:
+                alertmgr_cred_hash = f'{utils.md5_hash(alertmanager_user + alertmanager_password)}'
+                deps.append(alertmgr_cred_hash)
+
+        if not mgmt_gw_enabled:
+            deps += mgr.cache.get_daemons_by_types(['mgr'])
+
+        return sorted(deps)
+
     def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
         assert self.TYPE == daemon_spec.daemon_type
-        deps: List[str] = []
         default_webhook_urls: List[str] = []
 
         spec = cast(AlertManagerSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
@@ -312,31 +336,17 @@ class AlertmanagerService(CephadmService):
                 user_data['default_webhook_urls'], list):
             default_webhook_urls.extend(user_data['default_webhook_urls'])
 
-        # add a dependency since url_prefix depends on the existence of mgmt-gateway
-        deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('mgmt-gateway')]
-        # add a dependency since enbling basic-auth (or not) depends on the existence of 'oauth2-proxy'
-        deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('oauth2-proxy')]
-
         security_enabled, mgmt_gw_enabled, oauth2_enabled = self.mgr._get_security_config()
         if mgmt_gw_enabled:
             dashboard_urls = [f'{self.mgr.get_mgmt_gw_internal_endpoint()}/dashboard']
         else:
             dashboard_urls = get_dashboard_urls(self)
-            # scan all mgrs to generate deps and to get standbys too.
-            for dd in self.mgr.cache.get_daemons_by_service('mgr'):
-                # we consider mgr a dep even if the dashboard is disabled
-                # in order to be consistent with _calc_daemon_deps().
-                # when mgmt_gw is enabled there's no need for mgr dep as
-                # mgmt-gw wil route to the active mgr automatically
-                deps.append(dd.name())
 
         snmp_gateway_urls: List[str] = []
         for dd in self.mgr.cache.get_daemons_by_service('snmp-gateway'):
             assert dd.hostname is not None
             assert dd.ports
             addr = dd.ip if dd.ip else self.mgr.get_fqdn(dd.hostname)
-            deps.append(dd.name())
-
             snmp_gateway_urls.append(build_url(scheme='http', host=addr,
                                      port=dd.ports[0], path='/alerts'))
 
@@ -353,7 +363,6 @@ class AlertmanagerService(CephadmService):
         port = 9094
         for dd in self.mgr.cache.get_daemons_by_service('alertmanager'):
             assert dd.hostname is not None
-            deps.append(dd.name())
             addr = self.mgr.get_fqdn(dd.hostname)
             peers.append(build_url(host=addr, port=port).lstrip('/'))
 
@@ -364,11 +373,9 @@ class AlertmanagerService(CephadmService):
             if ip_to_bind_to:
                 daemon_spec.port_ips = {str(port): ip_to_bind_to}
 
-        deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}')
+        deps = self.get_dependencies(self.mgr)
         if security_enabled:
             alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials()
-            if alertmanager_user and alertmanager_password:
-                deps.append(f'{utils.md5_hash(alertmanager_user + alertmanager_password)}')
             cert, key = self.get_alertmanager_certificates(daemon_spec)
             context = {
                 'enable_mtls': mgmt_gw_enabled,
@@ -388,7 +395,7 @@ class AlertmanagerService(CephadmService):
                 'web_config': '/etc/alertmanager/web.yml',
                 'use_url_prefix': mgmt_gw_enabled,
                 'ip_to_bind_to': ip_to_bind_to
-            }, sorted(deps)
+            }, deps
         else:
             return {
                 "files": {
@@ -397,7 +404,7 @@ class AlertmanagerService(CephadmService):
                 "peers": peers,
                 'use_url_prefix': mgmt_gw_enabled,
                 'ip_to_bind_to': ip_to_bind_to
-            }, sorted(deps)
+            }, deps
 
     def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
         # TODO: if there are multiple daemons, who is the active one?
@@ -632,42 +639,46 @@ class PrometheusService(CephadmService):
         r['files']['/etc/prometheus/alerting/custom_alerts.yml'] = \
             self.mgr.get_store('services/prometheus/alerting/custom_alerts.yml', '')
 
-        return r, sorted(self.calculate_deps())
+        return r, self.get_dependencies(self.mgr)
 
-    def calculate_deps(self) -> List[str]:
+    @classmethod
+    def get_dependencies(cls, mgr: "CephadmOrchestrator",
+                         spec: Optional[ServiceSpec] = None,
+                         daemon_type: Optional[str] = None) -> List[str]:
         deps = []  # type: List[str]
-        port = cast(int, self.mgr.get_module_option_ex('prometheus', 'server_port', self.DEFAULT_MGR_PROMETHEUS_PORT))
+        port = cast(int, mgr.get_module_option_ex('prometheus', 'server_port', PrometheusService.DEFAULT_MGR_PROMETHEUS_PORT))
         deps.append(str(port))
-        deps.append(str(self.mgr.service_discovery_port))
-        deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}')
-        security_enabled, mgmt_gw_enabled, _ = self.mgr._get_security_config()
+        deps.append(str(mgr.service_discovery_port))
+        deps.append(f'secure_monitoring_stack:{mgr.secure_monitoring_stack}')
+        security_enabled, mgmt_gw_enabled, _ = mgr._get_security_config()
 
         if not mgmt_gw_enabled:
             # add an explicit dependency on the active manager. This will force to
             # re-deploy prometheus if the mgr has changed (due to a fail-over i.e).
             # when mgmt_gw is enabled there's no need for such dep as mgmt-gw wil
             # route to the active mgr automatically
-            deps.append(self.mgr.get_active_mgr().name())
+            deps.append(mgr.get_active_mgr().name())
 
         if security_enabled:
-            alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials()
-            prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials()
+            alertmanager_user, alertmanager_password = mgr._get_alertmanager_credentials()
+            prometheus_user, prometheus_password = mgr._get_prometheus_credentials()
             if prometheus_user and prometheus_password:
                 deps.append(f'{utils.md5_hash(prometheus_user + prometheus_password)}')
             if alertmanager_user and alertmanager_password:
                 deps.append(f'{utils.md5_hash(alertmanager_user + alertmanager_password)}')
 
         # add a dependency since url_prefix depends on the existence of mgmt-gateway
-        deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('mgmt-gateway')]
+        deps += [d.name() for d in mgr.cache.get_daemons_by_service('mgmt-gateway')]
         # add a dependency since enbling basic-auth (or not) depends on the existence of 'oauth2-proxy'
-        deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('oauth2-proxy')]
+        deps += [d.name() for d in mgr.cache.get_daemons_by_service('oauth2-proxy')]
 
         # add dependency on ceph-exporter daemons
-        deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('ceph-exporter')]
-        deps += [s for s in ['node-exporter', 'alertmanager'] if self.mgr.cache.get_daemons_by_service(s)]
-        if len(self.mgr.cache.get_daemons_by_type('ingress')) > 0:
+        deps += [d.name() for d in mgr.cache.get_daemons_by_service('ceph-exporter')]
+        deps += [s for s in ['node-exporter', 'alertmanager'] if mgr.cache.get_daemons_by_service(s)]
+        if len(mgr.cache.get_daemons_by_type('ingress')) > 0:
             deps.append('ingress')
-        return deps
+
+        return sorted(deps)
 
     def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
         # TODO: if there are multiple daemons, who is the active one?
@@ -725,6 +736,15 @@ class NodeExporterService(CephadmService):
     TYPE = 'node-exporter'
     DEFAULT_SERVICE_PORT = 9100
 
+    @classmethod
+    def get_dependencies(cls, mgr: "CephadmOrchestrator",
+                         spec: Optional[ServiceSpec] = None,
+                         daemon_type: Optional[str] = None) -> List[str]:
+        deps = []
+        deps.append(f'secure_monitoring_stack:{mgr.secure_monitoring_stack}')
+        deps = deps + mgr.cache.get_daemons_by_types(['mgmt-gateway'])
+        return sorted(deps)
+
     def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
         assert self.TYPE == daemon_spec.daemon_type
         daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
@@ -794,6 +814,12 @@ class PromtailService(CephadmService):
     TYPE = 'promtail'
     DEFAULT_SERVICE_PORT = 9080
 
+    @classmethod
+    def get_dependencies(cls, mgr: "CephadmOrchestrator",
+                         spec: Optional[ServiceSpec] = None,
+                         daemon_type: Optional[str] = None) -> List[str]:
+        return sorted(mgr.cache.get_daemons_by_types(['loki']))
+
     def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
         assert self.TYPE == daemon_spec.daemon_type
         daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
@@ -801,8 +827,7 @@ class PromtailService(CephadmService):
 
     def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
         assert self.TYPE == daemon_spec.daemon_type
-        deps: List[str] = []
-
+        deps: List[str] = self.get_dependencies(self.mgr)
         daemons = self.mgr.cache.get_daemons_by_service('loki')
         loki_host = ''
         for i, dd in enumerate(daemons):
@@ -810,8 +835,6 @@ class PromtailService(CephadmService):
             if i == 0:
                 loki_host = dd.ip if dd.ip else self.mgr.get_fqdn(dd.hostname)
 
-            deps.append(dd.name())
-
         context = {
             'client_hostname': loki_host,
         }
@@ -821,7 +844,7 @@ class PromtailService(CephadmService):
             "files": {
                 "promtail.yml": yml
             }
-        }, sorted(deps)
+        }, deps
 
 
 class SNMPGatewayService(CephadmService):
index 00849da20e334f475e8997a2ff4c1e623bf918df..8ad230b6342b27db33ebd2085cc942957e816a38 100644 (file)
@@ -3,13 +3,16 @@ import ssl
 import base64
 
 from urllib.error import HTTPError, URLError
-from typing import List, Any, Dict, Tuple, Optional, MutableMapping
+from typing import List, Any, Dict, Tuple, Optional, MutableMapping, TYPE_CHECKING
 
 from .cephadmservice import CephadmDaemonDeploySpec, CephService
 from ceph.deployment.service_spec import ServiceSpec, PlacementSpec
 from ceph.utils import http_req
 from orchestrator import OrchestratorError
 
+if TYPE_CHECKING:
+    from ..module import CephadmOrchestrator
+
 
 class NodeProxy(CephService):
     TYPE = 'node-proxy'
@@ -29,6 +32,19 @@ class NodeProxy(CephService):
 
         return daemon_spec
 
+    @classmethod
+    def get_dependencies(cls, mgr: "CephadmOrchestrator",
+                         spec: Optional[ServiceSpec] = None,
+                         daemon_type: Optional[str] = None) -> List[str]:
+        root_cert = ''
+        server_port = ''
+        try:
+            server_port = str(mgr.http_server.agent.server_port)
+            root_cert = mgr.cert_mgr.get_root_ca()
+        except Exception:
+            pass
+        return sorted([mgr.get_mgr_ip(), server_port, root_cert])
+
     def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
         # node-proxy is re-using the agent endpoint and therefore
         # needs similar checks to see if the endpoint is ready.
@@ -52,8 +68,7 @@ class NodeProxy(CephService):
         }
         config = {'node-proxy.json': json.dumps(cfg)}
 
-        return config, sorted([str(self.mgr.get_mgr_ip()), str(self.agent_endpoint.server_port),
-                               self.mgr.cert_mgr.get_root_ca()])
+        return config, self.get_dependencies(self.mgr)
 
     def handle_hw_monitoring_setting(self) -> bool:
         # function to apply or remove node-proxy service spec depending
index d872219df80896f10b674b21b5786a5cb36d0ffd..edb0f887188802d60053457f80773d545395daf6 100644 (file)
@@ -284,7 +284,7 @@ class TestISCSIService:
 
     @patch("cephadm.serve.CephadmServe._run_cephadm")
     @patch("cephadm.module.CephadmOrchestrator.get_unique_name")
-    @patch("cephadm.services.iscsi.IscsiService.get_trusted_ips")
+    @patch("cephadm.services.iscsi.get_trusted_ips")
     def test_iscsi_config(self, _get_trusted_ips, _get_name, _run_cephadm, cephadm_module: CephadmOrchestrator):
 
         iscsi_daemon_id = 'testpool.test.qwert'