From: Adam King Date: Tue, 9 May 2023 19:06:41 +0000 (-0400) Subject: mgr/cephadm: make jaeger-collector urls a dep for jaeger-agent X-Git-Tag: v18.2.4~196^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F56089%2Fhead;p=ceph.git mgr/cephadm: make jaeger-collector urls a dep for jaeger-agent the jaeger-agent's need to know the url for the collector(s) that have been deployed. If a collector moves, or we deployed the agents before the collector, we need to reconfig the agents with updated info about the collectors. Failure to do so can leave the jager-agents down reporting ``` Could not create collector proxy","error":"at least one collector hostPort address is required when resolver is not available" ``` Fixes: https://tracker.ceph.com/issues/59704 Signed-off-by: Adam King (cherry picked from commit acc26d7b17316d1e45bfc3a882355b46db19d053) --- diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index b59cf6687f9..6209ae5770f 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -41,6 +41,7 @@ from cephadm.agent import CephadmAgentHelpers from mgr_module import MgrModule, HandleCommandResult, Option, NotifyType +from mgr_util import build_url import orchestrator from orchestrator.module import to_format, Format @@ -2813,6 +2814,12 @@ Then run the following: deps.append(f'{hash(alertmanager_user + alertmanager_password)}') elif daemon_type == 'promtail': deps += get_daemon_names(['loki']) + elif daemon_type == JaegerAgentService.TYPE: + for dd in self.cache.get_daemons_by_type(JaegerCollectorService.TYPE): + assert dd.hostname is not None + port = dd.ports[0] if dd.ports else JaegerCollectorService.DEFAULT_SERVICE_PORT + deps.append(build_url(host=dd.hostname, port=port).lstrip('/')) + deps = sorted(deps) else: # TODO(redo): some error message! pass diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 21713960dae..dbbdb68d3c4 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -1073,6 +1073,11 @@ class CephadmServe: diff = list(set(last_deps) - set(deps)) if any('secure_monitoring_stack' in e for e in diff): action = 'redeploy' + elif dd.daemon_type == 'jaeger-agent': + # changes to jaeger-agent deps affect the way the unit.run for + # the daemon is written, which we rewrite on redeploy, but not + # on reconfig. + action = 'redeploy' elif spec is not None and hasattr(spec, 'extra_container_args') and dd.extra_container_args != spec.extra_container_args: self.log.debug( diff --git a/src/pybind/mgr/cephadm/services/jaeger.py b/src/pybind/mgr/cephadm/services/jaeger.py index c136d20e612..c83c765d039 100644 --- a/src/pybind/mgr/cephadm/services/jaeger.py +++ b/src/pybind/mgr/cephadm/services/jaeger.py @@ -20,13 +20,16 @@ class JaegerAgentService(CephadmService): def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: assert self.TYPE == daemon_spec.daemon_type collectors = [] + deps: List[str] = [] for dd in self.mgr.cache.get_daemons_by_type(JaegerCollectorService.TYPE): # scrape jaeger-collector nodes assert dd.hostname is not None port = dd.ports[0] if dd.ports else JaegerCollectorService.DEFAULT_SERVICE_PORT url = build_url(host=dd.hostname, port=port).lstrip('/') collectors.append(url) + deps.append(url) daemon_spec.final_config = {'collector_nodes': ",".join(collectors)} + daemon_spec.deps = sorted(deps) return daemon_spec