mgr/cephadm: fixing prometheus port handling

author Adam King <adking@redhat.com>

Wed, 2 Mar 2022 05:23:52 +0000 (00:23 -0500)

committer Adam King <adking@redhat.com>

Tue, 5 Apr 2022 20:10:22 +0000 (16:10 -0400)
author Adam King <adking@redhat.com>
Wed, 2 Mar 2022 05:23:52 +0000 (00:23 -0500)
committer Adam King <adking@redhat.com>
Tue, 5 Apr 2022 20:10:22 +0000 (16:10 -0400)
diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py

index 5a633eae172f7e2c1c9b623c2bb6201fa2f58a61..44fe7f09d0718ea3da6b932607ab38b5ebb0ee5c 100644 (file)
--- a/src/pybind/mgr/cephadm/module.py
+++ b/src/pybind/mgr/cephadm/module.py
@@ -2427,6 +2427,8 @@ Then run the following:
              for dep_type in need.get(daemon_type, []):
                  for dd in self.cache.get_daemons_by_type(dep_type):
                      deps.append(dd.name())
+            if daemon_type == 'prometheus':
+                deps.append(str(self.get_module_option_ex('prometheus', 'server_port', 9283)))
          return sorted(deps)
  
      @forall_hosts
diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py

index 34ae333d6678ed1d6b94a10a200ab0d734db7649..e0682b1a3263a0e587d0c0b9ad6607ccb3f231f6 100644 (file)
--- a/src/pybind/mgr/cephadm/serve.py
+++ b/src/pybind/mgr/cephadm/serve.py
@@ -688,7 +688,7 @@ class CephadmServe:
                  slot = slot.assign_name(self.mgr.get_unique_name(
                      slot.daemon_type,
                      slot.hostname,
-                    daemons,
+                    [d for d in daemons if d not in daemons_to_remove],
                      prefix=spec.service_id,
                      forcename=slot.name,
                      rank=slot.rank,
@@ -718,18 +718,20 @@ class CephadmServe:
              # create daemons
              daemon_place_fails = []
              for slot in slots_to_add:
-                # first remove daemon on conflicting port?
-                if slot.ports:
+                # first remove daemon with conflicting port or name?
+                if slot.ports or slot.name in [d.name() for d in daemons_to_remove]:
                      for d in daemons_to_remove:
-                        if d.hostname != slot.hostname:
+                        if (
+                            d.hostname != slot.hostname
+                            or not (set(d.ports or []) & set(slot.ports))
+                            or (d.ip and slot.ip and d.ip != slot.ip)
+                            and d.name() != slot.name
+                        ):
                              continue
-                        if not (set(d.ports or []) & set(slot.ports)):
-                            continue
-                        if d.ip and slot.ip and d.ip != slot.ip:
-                            continue
-                        self.log.info(
-                            f'Removing {d.name()} before deploying to {slot} to avoid a port conflict'
-                        )
+                        if d.name() != slot.name:
+                            self.log.info(
+                                f'Removing {d.name()} before deploying to {slot} to avoid a port or conflict'
+                            )
                          # NOTE: we don't check ok-to-stop here to avoid starvation if
                          # there is only 1 gateway.
                          self._remove_daemon(d.name(), d.hostname)
diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py

index 3b738a330bec03c94772f03ae798a92bc815b91c..13d0ff497f7d5658e9b6cb4296bfcfac1d6093eb 100644 (file)
--- a/src/pybind/mgr/cephadm/services/monitoring.py
+++ b/src/pybind/mgr/cephadm/services/monitoring.py
@@ -217,6 +217,7 @@ class AlertmanagerService(CephadmService):
  class PrometheusService(CephadmService):
      TYPE = 'prometheus'
      DEFAULT_SERVICE_PORT = 9095
+    DEFAULT_MGR_PROMETHEUS_PORT = 9283
  
      def config(self, spec: ServiceSpec) -> None:
          # make sure module is enabled
@@ -247,13 +248,19 @@ class PrometheusService(CephadmService):
          # scrape mgrs
          mgr_scrape_list = []
          mgr_map = self.mgr.get('mgr_map')
-        port = None
+        port = cast(int, self.mgr.get_module_option_ex(
+            'prometheus', 'server_port', self.DEFAULT_MGR_PROMETHEUS_PORT))
+        deps.append(str(port))
          t = mgr_map.get('services', {}).get('prometheus', None)
          if t:
              p_result = urlparse(t)
-            t = t.split('/')[2]
-            mgr_scrape_list.append(t)
-            port = p_result.port or 9283
+            # urlparse .hostname removes '[]' from the hostname in case
+            # of ipv6 addresses so if this is the case then we just
+            # append the brackets when building the final scrape endpoint
+            if '[' in p_result.netloc and ']' in p_result.netloc:
+                mgr_scrape_list.append(f"[{p_result.hostname}]:{port}")
+            else:
+                mgr_scrape_list.append(f"{p_result.hostname}:{port}")
          # scan all mgrs to generate deps and to get standbys too.
          # assume that they are all on the same port as the active mgr.
          for dd in self.mgr.cache.get_daemons_by_service('mgr'):
diff --git a/src/pybind/mgr/cephadm/tests/fixtures.py b/src/pybind/mgr/cephadm/tests/fixtures.py

index 40a8ad6360ca828e6ee49e57cdd6ab6990407194..eef15e83056870cb9fe59e8eb5f4d1ebc714fafd 100644 (file)
--- a/src/pybind/mgr/cephadm/tests/fixtures.py
+++ b/src/pybind/mgr/cephadm/tests/fixtures.py
@@ -19,6 +19,13 @@ def get_ceph_option(_, key):
      return __file__
  
  
+def get_module_option_ex(_, module, key, default=None):
+    if module == 'prometheus':
+        if key == 'server_port':
+            return 9283
+    return None
+
+
  def _run_cephadm(ret):
      def foo(s, host, entity, cmd, e, **kwargs):
          if cmd == 'gather-facts':
@@ -41,6 +48,7 @@ def with_cephadm_module(module_options=None, store=None):
      """
      with mock.patch("cephadm.module.CephadmOrchestrator.get_ceph_option", get_ceph_option),\
              mock.patch("cephadm.services.osd.RemoveUtil._run_mon_cmd"), \
+            mock.patch('cephadm.module.CephadmOrchestrator.get_module_option_ex', get_module_option_ex),\
              mock.patch("cephadm.module.CephadmOrchestrator.get_osdmap"), \
              mock.patch("cephadm.module.CephadmOrchestrator.remote"), \
              mock.patch('cephadm.offline_watcher.OfflineHostWatcher.run'):
diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py

index a753cfd9fb9bf066cb7dfded5b18e4caf980722a..a5823a8eedabac83a0dc241908bc7dd9a88aa19b 100644 (file)
--- a/src/pybind/mgr/cephadm/tests/test_services.py
+++ b/src/pybind/mgr/cephadm/tests/test_services.py
@@ -301,7 +301,7 @@ class TestMonitoring:
                      honor_labels: true
                      static_configs:
                      - targets:
-                      - '[::1]:8081'
+                      - '[::1]:9283'
  
                    - job_name: 'node'
                      static_configs:
diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py

index c51c9ab3bbe4a7efaa1e238fbf6dbf6556b2de41..7890b5eec92793d0ea96fbc5f083d68361c07365 100644 (file)
--- a/src/pybind/mgr/prometheus/module.py
+++ b/src/pybind/mgr/prometheus/module.py
@@ -550,7 +550,10 @@ class Module(MgrModule):
          ),
          Option(
              'server_port',
-            type='int'
+            type='int',
+            default=DEFAULT_PORT,
+            desc='the port on which the module listens for HTTP requests',
+            runtime=True
          ),
          Option(
              'scrape_interval',
@@ -815,6 +818,31 @@ class Module(MgrModule):
  
          return metrics
  
+    def get_server_addr(self) -> str:
+        """
+        Return the current mgr server IP.
+        """
+        server_addr = cast(str, self.get_localized_module_option('server_addr', get_default_addr()))
+        if server_addr in ['::', '0.0.0.0']:
+            return self.get_mgr_ip()
+        return server_addr
+
+    def config_notify(self) -> None:
+        """
+        This method is called whenever one of our config options is changed.
+        """
+        # https://stackoverflow.com/questions/7254845/change-cherrypy-port-and-restart-web-server
+        # if we omit the line: cherrypy.server.httpserver = None
+        # then the cherrypy server is not restarted correctly
+        self.log.info('Restarting engine...')
+        cherrypy.engine.stop()
+        cherrypy.server.httpserver = None
+        server_port = cast(int, self.get_localized_module_option('server_port', DEFAULT_PORT))
+        self.set_uri(build_url(scheme='http', host=self.get_server_addr(), port=server_port, path='/'))
+        cherrypy.config.update({'server.socket_port': server_port})
+        cherrypy.engine.start()
+        self.log.info('Engine started.')
+
      @profile_method()
      def get_health(self) -> None:
  
@@ -1728,9 +1756,7 @@ class Module(MgrModule):
          })
          # Publish the URI that others may use to access the service we're
          # about to start serving
-        if server_addr in ['::', '0.0.0.0']:
-            server_addr = self.get_mgr_ip()
-        self.set_uri(build_url(scheme='http', host=server_addr, port=server_port, path='/'))
+        self.set_uri(build_url(scheme='http', host=self.get_server_addr(), port=server_port, path='/'))
  
          cherrypy.tree.mount(Root(), "/")
          self.log.info('Starting engine...')
@@ -1742,6 +1768,7 @@ class Module(MgrModule):
          # tell metrics collection thread to stop collecting new metrics
          self.metrics_thread.stop()
          cherrypy.engine.stop()
+        cherrypy.server.httpserver = None
          self.log.info('Engine stopped.')
          self.shutdown_rbd_stats()
          # wait for the metrics collection thread to stop
@@ -1838,6 +1865,7 @@ class StandbyModule(MgrStandbyModule):
          self.shutdown_event.wait()
          self.shutdown_event.clear()
          cherrypy.engine.stop()
+        cherrypy.server.httpserver = None
          self.log.info('Engine stopped.')
  
      def shutdown(self) -> None:
author	Adam King <adking@redhat.com>
	Wed, 2 Mar 2022 05:23:52 +0000 (00:23 -0500)
committer	Adam King <adking@redhat.com>
	Tue, 5 Apr 2022 20:10:22 +0000 (16:10 -0400)
src/pybind/mgr/cephadm/module.py		patch \| blob \| history
src/pybind/mgr/cephadm/serve.py		patch \| blob \| history
src/pybind/mgr/cephadm/services/monitoring.py		patch \| blob \| history
src/pybind/mgr/cephadm/tests/fixtures.py		patch \| blob \| history
src/pybind/mgr/cephadm/tests/test_services.py		patch \| blob \| history
src/pybind/mgr/prometheus/module.py		patch \| blob \| history