From: Adam King Date: Wed, 2 Mar 2022 05:23:52 +0000 (-0500) Subject: mgr/cephadm: fixing prometheus port handling X-Git-Tag: v16.2.8~27^2~1 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=b9fe7392f56dacca9dc2b106157035fc1bf5ead8;p=ceph.git mgr/cephadm: fixing prometheus port handling Fixes: https://tracker.ceph.com/issues/51072 Signed-off-by: Redouane Kachach (cherry picked from commit 8eb1397d77dace25f387e88137a1807993a0796d) Conflicts: src/pybind/mgr/prometheus/module.py --- diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 5a633eae172f7..44fe7f09d0718 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -2427,6 +2427,8 @@ Then run the following: for dep_type in need.get(daemon_type, []): for dd in self.cache.get_daemons_by_type(dep_type): deps.append(dd.name()) + if daemon_type == 'prometheus': + deps.append(str(self.get_module_option_ex('prometheus', 'server_port', 9283))) return sorted(deps) @forall_hosts diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 34ae333d6678e..e0682b1a3263a 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -688,7 +688,7 @@ class CephadmServe: slot = slot.assign_name(self.mgr.get_unique_name( slot.daemon_type, slot.hostname, - daemons, + [d for d in daemons if d not in daemons_to_remove], prefix=spec.service_id, forcename=slot.name, rank=slot.rank, @@ -718,18 +718,20 @@ class CephadmServe: # create daemons daemon_place_fails = [] for slot in slots_to_add: - # first remove daemon on conflicting port? - if slot.ports: + # first remove daemon with conflicting port or name? + if slot.ports or slot.name in [d.name() for d in daemons_to_remove]: for d in daemons_to_remove: - if d.hostname != slot.hostname: + if ( + d.hostname != slot.hostname + or not (set(d.ports or []) & set(slot.ports)) + or (d.ip and slot.ip and d.ip != slot.ip) + and d.name() != slot.name + ): continue - if not (set(d.ports or []) & set(slot.ports)): - continue - if d.ip and slot.ip and d.ip != slot.ip: - continue - self.log.info( - f'Removing {d.name()} before deploying to {slot} to avoid a port conflict' - ) + if d.name() != slot.name: + self.log.info( + f'Removing {d.name()} before deploying to {slot} to avoid a port or conflict' + ) # NOTE: we don't check ok-to-stop here to avoid starvation if # there is only 1 gateway. self._remove_daemon(d.name(), d.hostname) diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index 3b738a330bec0..13d0ff497f7d5 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -217,6 +217,7 @@ class AlertmanagerService(CephadmService): class PrometheusService(CephadmService): TYPE = 'prometheus' DEFAULT_SERVICE_PORT = 9095 + DEFAULT_MGR_PROMETHEUS_PORT = 9283 def config(self, spec: ServiceSpec) -> None: # make sure module is enabled @@ -247,13 +248,19 @@ class PrometheusService(CephadmService): # scrape mgrs mgr_scrape_list = [] mgr_map = self.mgr.get('mgr_map') - port = None + port = cast(int, self.mgr.get_module_option_ex( + 'prometheus', 'server_port', self.DEFAULT_MGR_PROMETHEUS_PORT)) + deps.append(str(port)) t = mgr_map.get('services', {}).get('prometheus', None) if t: p_result = urlparse(t) - t = t.split('/')[2] - mgr_scrape_list.append(t) - port = p_result.port or 9283 + # urlparse .hostname removes '[]' from the hostname in case + # of ipv6 addresses so if this is the case then we just + # append the brackets when building the final scrape endpoint + if '[' in p_result.netloc and ']' in p_result.netloc: + mgr_scrape_list.append(f"[{p_result.hostname}]:{port}") + else: + mgr_scrape_list.append(f"{p_result.hostname}:{port}") # scan all mgrs to generate deps and to get standbys too. # assume that they are all on the same port as the active mgr. for dd in self.mgr.cache.get_daemons_by_service('mgr'): diff --git a/src/pybind/mgr/cephadm/tests/fixtures.py b/src/pybind/mgr/cephadm/tests/fixtures.py index 40a8ad6360ca8..eef15e8305687 100644 --- a/src/pybind/mgr/cephadm/tests/fixtures.py +++ b/src/pybind/mgr/cephadm/tests/fixtures.py @@ -19,6 +19,13 @@ def get_ceph_option(_, key): return __file__ +def get_module_option_ex(_, module, key, default=None): + if module == 'prometheus': + if key == 'server_port': + return 9283 + return None + + def _run_cephadm(ret): def foo(s, host, entity, cmd, e, **kwargs): if cmd == 'gather-facts': @@ -41,6 +48,7 @@ def with_cephadm_module(module_options=None, store=None): """ with mock.patch("cephadm.module.CephadmOrchestrator.get_ceph_option", get_ceph_option),\ mock.patch("cephadm.services.osd.RemoveUtil._run_mon_cmd"), \ + mock.patch('cephadm.module.CephadmOrchestrator.get_module_option_ex', get_module_option_ex),\ mock.patch("cephadm.module.CephadmOrchestrator.get_osdmap"), \ mock.patch("cephadm.module.CephadmOrchestrator.remote"), \ mock.patch('cephadm.offline_watcher.OfflineHostWatcher.run'): diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index a753cfd9fb9bf..a5823a8eedaba 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -301,7 +301,7 @@ class TestMonitoring: honor_labels: true static_configs: - targets: - - '[::1]:8081' + - '[::1]:9283' - job_name: 'node' static_configs: diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py index c51c9ab3bbe4a..7890b5eec9279 100644 --- a/src/pybind/mgr/prometheus/module.py +++ b/src/pybind/mgr/prometheus/module.py @@ -550,7 +550,10 @@ class Module(MgrModule): ), Option( 'server_port', - type='int' + type='int', + default=DEFAULT_PORT, + desc='the port on which the module listens for HTTP requests', + runtime=True ), Option( 'scrape_interval', @@ -815,6 +818,31 @@ class Module(MgrModule): return metrics + def get_server_addr(self) -> str: + """ + Return the current mgr server IP. + """ + server_addr = cast(str, self.get_localized_module_option('server_addr', get_default_addr())) + if server_addr in ['::', '0.0.0.0']: + return self.get_mgr_ip() + return server_addr + + def config_notify(self) -> None: + """ + This method is called whenever one of our config options is changed. + """ + # https://stackoverflow.com/questions/7254845/change-cherrypy-port-and-restart-web-server + # if we omit the line: cherrypy.server.httpserver = None + # then the cherrypy server is not restarted correctly + self.log.info('Restarting engine...') + cherrypy.engine.stop() + cherrypy.server.httpserver = None + server_port = cast(int, self.get_localized_module_option('server_port', DEFAULT_PORT)) + self.set_uri(build_url(scheme='http', host=self.get_server_addr(), port=server_port, path='/')) + cherrypy.config.update({'server.socket_port': server_port}) + cherrypy.engine.start() + self.log.info('Engine started.') + @profile_method() def get_health(self) -> None: @@ -1728,9 +1756,7 @@ class Module(MgrModule): }) # Publish the URI that others may use to access the service we're # about to start serving - if server_addr in ['::', '0.0.0.0']: - server_addr = self.get_mgr_ip() - self.set_uri(build_url(scheme='http', host=server_addr, port=server_port, path='/')) + self.set_uri(build_url(scheme='http', host=self.get_server_addr(), port=server_port, path='/')) cherrypy.tree.mount(Root(), "/") self.log.info('Starting engine...') @@ -1742,6 +1768,7 @@ class Module(MgrModule): # tell metrics collection thread to stop collecting new metrics self.metrics_thread.stop() cherrypy.engine.stop() + cherrypy.server.httpserver = None self.log.info('Engine stopped.') self.shutdown_rbd_stats() # wait for the metrics collection thread to stop @@ -1838,6 +1865,7 @@ class StandbyModule(MgrStandbyModule): self.shutdown_event.wait() self.shutdown_event.clear() cherrypy.engine.stop() + cherrypy.server.httpserver = None self.log.info('Engine stopped.') def shutdown(self) -> None: