]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: make jaeger-collector urls a dep for jaeger-agent 51416/head
authorAdam King <adking@redhat.com>
Tue, 9 May 2023 19:06:41 +0000 (15:06 -0400)
committerAdam King <adking@redhat.com>
Mon, 2 Oct 2023 17:14:58 +0000 (13:14 -0400)
the jaeger-agent's need to know the url for the collector(s)
that have been deployed. If a collector moves, or we deployed
the agents before the collector, we need to reconfig the agents
with updated info about the collectors. Failure to do so can
leave the jager-agents down reporting

```
Could not create collector proxy","error":"at least one collector hostPort address is required when resolver is not available"
```

Fixes: https://tracker.ceph.com/issues/59704
Signed-off-by: Adam King <adking@redhat.com>
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/serve.py
src/pybind/mgr/cephadm/services/jaeger.py

index 4b6f7cf7a567246b56692b2ed4c7eebb6b38c735..70d66732f0f5c0d3e928322f695bfae1fc139b2b 100644 (file)
@@ -40,6 +40,7 @@ from cephadm.agent import CephadmAgentHelpers
 
 
 from mgr_module import MgrModule, HandleCommandResult, Option, NotifyType
+from mgr_util import build_url
 import orchestrator
 from orchestrator.module import to_format, Format
 
@@ -2712,6 +2713,12 @@ Then run the following:
                 deps.append(f'{hash(alertmanager_user + alertmanager_password)}')
         elif daemon_type == 'promtail':
             deps += get_daemon_names(['loki'])
+        elif daemon_type == JaegerAgentService.TYPE:
+            for dd in self.cache.get_daemons_by_type(JaegerCollectorService.TYPE):
+                assert dd.hostname is not None
+                port = dd.ports[0] if dd.ports else JaegerCollectorService.DEFAULT_SERVICE_PORT
+                deps.append(build_url(host=dd.hostname, port=port).lstrip('/'))
+            deps = sorted(deps)
         else:
             # TODO(redo): some error message!
             pass
index a17ac151e3a68eea7ee88598fc0accb953eccf3e..116e97238691995b05392b2d063c94c14b095b00 100644 (file)
@@ -1060,6 +1060,11 @@ class CephadmServe:
                     diff = list(set(last_deps) - set(deps))
                     if any('secure_monitoring_stack' in e for e in diff):
                         action = 'redeploy'
+                elif dd.daemon_type == 'jaeger-agent':
+                    # changes to jaeger-agent deps affect the way the unit.run for
+                    # the daemon is written, which we rewrite on redeploy, but not
+                    # on reconfig.
+                    action = 'redeploy'
 
             elif spec is not None and hasattr(spec, 'extra_container_args') and dd.extra_container_args != spec.extra_container_args:
                 self.log.debug(
index c136d20e612a38bacd53328339ecc52d4337a284..c83c765d0394af0ac608d360ff8fd4230a04441b 100644 (file)
@@ -20,13 +20,16 @@ class JaegerAgentService(CephadmService):
     def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
         assert self.TYPE == daemon_spec.daemon_type
         collectors = []
+        deps: List[str] = []
         for dd in self.mgr.cache.get_daemons_by_type(JaegerCollectorService.TYPE):
             # scrape jaeger-collector nodes
             assert dd.hostname is not None
             port = dd.ports[0] if dd.ports else JaegerCollectorService.DEFAULT_SERVICE_PORT
             url = build_url(host=dd.hostname, port=port).lstrip('/')
             collectors.append(url)
+            deps.append(url)
         daemon_spec.final_config = {'collector_nodes': ",".join(collectors)}
+        daemon_spec.deps = sorted(deps)
         return daemon_spec