From cc55847e3d2f92685da63fdb3ba402b2c2905021 Mon Sep 17 00:00:00 2001 From: Adam King Date: Tue, 9 May 2023 15:06:41 -0400 Subject: [PATCH] mgr/cephadm: make jaeger-collector urls a dep for jaeger-agent the jaeger-agent's need to know the url for the collector(s) that have been deployed. If a collector moves, or we deployed the agents before the collector, we need to reconfig the agents with updated info about the collectors. Failure to do so can leave the jager-agents down reporting ``` Could not create collector proxy","error":"at least one collector hostPort address is required when resolver is not available" ``` Fixes: https://tracker.ceph.com/issues/59704 Signed-off-by: Adam King (cherry picked from commit acc26d7b17316d1e45bfc3a882355b46db19d053) --- src/pybind/mgr/cephadm/module.py | 7 +++++++ src/pybind/mgr/cephadm/serve.py | 5 +++++ src/pybind/mgr/cephadm/services/jaeger.py | 3 +++ 3 files changed, 15 insertions(+) diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index b59cf6687f9f4..6209ae5770fc8 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -41,6 +41,7 @@ from cephadm.agent import CephadmAgentHelpers from mgr_module import MgrModule, HandleCommandResult, Option, NotifyType +from mgr_util import build_url import orchestrator from orchestrator.module import to_format, Format @@ -2813,6 +2814,12 @@ Then run the following: deps.append(f'{hash(alertmanager_user + alertmanager_password)}') elif daemon_type == 'promtail': deps += get_daemon_names(['loki']) + elif daemon_type == JaegerAgentService.TYPE: + for dd in self.cache.get_daemons_by_type(JaegerCollectorService.TYPE): + assert dd.hostname is not None + port = dd.ports[0] if dd.ports else JaegerCollectorService.DEFAULT_SERVICE_PORT + deps.append(build_url(host=dd.hostname, port=port).lstrip('/')) + deps = sorted(deps) else: # TODO(redo): some error message! pass diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 21713960daeb2..dbbdb68d3c4bc 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -1073,6 +1073,11 @@ class CephadmServe: diff = list(set(last_deps) - set(deps)) if any('secure_monitoring_stack' in e for e in diff): action = 'redeploy' + elif dd.daemon_type == 'jaeger-agent': + # changes to jaeger-agent deps affect the way the unit.run for + # the daemon is written, which we rewrite on redeploy, but not + # on reconfig. + action = 'redeploy' elif spec is not None and hasattr(spec, 'extra_container_args') and dd.extra_container_args != spec.extra_container_args: self.log.debug( diff --git a/src/pybind/mgr/cephadm/services/jaeger.py b/src/pybind/mgr/cephadm/services/jaeger.py index c136d20e612a3..c83c765d0394a 100644 --- a/src/pybind/mgr/cephadm/services/jaeger.py +++ b/src/pybind/mgr/cephadm/services/jaeger.py @@ -20,13 +20,16 @@ class JaegerAgentService(CephadmService): def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: assert self.TYPE == daemon_spec.daemon_type collectors = [] + deps: List[str] = [] for dd in self.mgr.cache.get_daemons_by_type(JaegerCollectorService.TYPE): # scrape jaeger-collector nodes assert dd.hostname is not None port = dd.ports[0] if dd.ports else JaegerCollectorService.DEFAULT_SERVICE_PORT url = build_url(host=dd.hostname, port=port).lstrip('/') collectors.append(url) + deps.append(url) daemon_spec.final_config = {'collector_nodes': ",".join(collectors)} + daemon_spec.deps = sorted(deps) return daemon_spec -- 2.39.5