]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: make jaeger-collector urls a dep for jaeger-agent 56089/head
authorAdam King <adking@redhat.com>
Tue, 9 May 2023 19:06:41 +0000 (15:06 -0400)
committerAdam King <adking@redhat.com>
Sun, 10 Mar 2024 19:44:05 +0000 (15:44 -0400)
the jaeger-agent's need to know the url for the collector(s)
that have been deployed. If a collector moves, or we deployed
the agents before the collector, we need to reconfig the agents
with updated info about the collectors. Failure to do so can
leave the jager-agents down reporting

```
Could not create collector proxy","error":"at least one collector hostPort address is required when resolver is not available"
```

Fixes: https://tracker.ceph.com/issues/59704
Signed-off-by: Adam King <adking@redhat.com>
(cherry picked from commit acc26d7b17316d1e45bfc3a882355b46db19d053)

src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/serve.py
src/pybind/mgr/cephadm/services/jaeger.py

index b59cf6687f9f46df331ebdb051deef25c7b459d6..6209ae5770fc810cf8377b69f918098c2c10ebea 100644 (file)
@@ -41,6 +41,7 @@ from cephadm.agent import CephadmAgentHelpers
 
 
 from mgr_module import MgrModule, HandleCommandResult, Option, NotifyType
+from mgr_util import build_url
 import orchestrator
 from orchestrator.module import to_format, Format
 
@@ -2813,6 +2814,12 @@ Then run the following:
                 deps.append(f'{hash(alertmanager_user + alertmanager_password)}')
         elif daemon_type == 'promtail':
             deps += get_daemon_names(['loki'])
+        elif daemon_type == JaegerAgentService.TYPE:
+            for dd in self.cache.get_daemons_by_type(JaegerCollectorService.TYPE):
+                assert dd.hostname is not None
+                port = dd.ports[0] if dd.ports else JaegerCollectorService.DEFAULT_SERVICE_PORT
+                deps.append(build_url(host=dd.hostname, port=port).lstrip('/'))
+            deps = sorted(deps)
         else:
             # TODO(redo): some error message!
             pass
index 21713960daeb25a18b81b3781cd558c034133df6..dbbdb68d3c4bcece2d831ef40cbd258fe6895f38 100644 (file)
@@ -1073,6 +1073,11 @@ class CephadmServe:
                     diff = list(set(last_deps) - set(deps))
                     if any('secure_monitoring_stack' in e for e in diff):
                         action = 'redeploy'
+                elif dd.daemon_type == 'jaeger-agent':
+                    # changes to jaeger-agent deps affect the way the unit.run for
+                    # the daemon is written, which we rewrite on redeploy, but not
+                    # on reconfig.
+                    action = 'redeploy'
 
             elif spec is not None and hasattr(spec, 'extra_container_args') and dd.extra_container_args != spec.extra_container_args:
                 self.log.debug(
index c136d20e612a38bacd53328339ecc52d4337a284..c83c765d0394af0ac608d360ff8fd4230a04441b 100644 (file)
@@ -20,13 +20,16 @@ class JaegerAgentService(CephadmService):
     def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
         assert self.TYPE == daemon_spec.daemon_type
         collectors = []
+        deps: List[str] = []
         for dd in self.mgr.cache.get_daemons_by_type(JaegerCollectorService.TYPE):
             # scrape jaeger-collector nodes
             assert dd.hostname is not None
             port = dd.ports[0] if dd.ports else JaegerCollectorService.DEFAULT_SERVICE_PORT
             url = build_url(host=dd.hostname, port=port).lstrip('/')
             collectors.append(url)
+            deps.append(url)
         daemon_spec.final_config = {'collector_nodes': ",".join(collectors)}
+        daemon_spec.deps = sorted(deps)
         return daemon_spec