]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/dashboard: add ssl to prometheus federation
authorAashish Sharma <aasharma@li-e74156cc-2f67-11b2-a85c-e98659a63c5c.ibm.com>
Tue, 5 Mar 2024 04:27:42 +0000 (09:57 +0530)
committerAashish Sharma <aasharma@li-e74156cc-2f67-11b2-a85c-e98659a63c5c.ibm.com>
Wed, 11 Sep 2024 05:24:16 +0000 (10:54 +0530)
Fixes: https://tracker.ceph.com/issues/65057
Signed-off-by: Aashish Sharma <aasharma@redhat.com>
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2
src/pybind/mgr/dashboard/controllers/multi_cluster.py
src/pybind/mgr/orchestrator/_interface.py
src/pybind/mgr/orchestrator/module.py

index 0bca599961e33fe01256cbe1668d0920ba559113..021ec23f135d4c1182677de3d66dc740dee54b8b 100644 (file)
@@ -3184,6 +3184,16 @@ Then run the following:
         self.set_store(PrometheusService.USER_CFG_KEY, user)
         self.set_store(PrometheusService.PASS_CFG_KEY, password)
         return 'prometheus credentials updated correctly'
+    
+    @handle_orch_error
+    def set_prometheus_cert(self, cert: str) -> str:
+        self.set_store(PrometheusService.PROMETHEUS_CERT_CFG_KEY, cert)
+        return 'prometheus cert stored correctly'
+    
+    @handle_orch_error
+    def get_prometheus_cert(self) -> str:
+        prometheus_cert = self.get_store(PrometheusService.PROMETHEUS_CERT_CFG_KEY)
+        return prometheus_cert
 
     @handle_orch_error
     def set_custom_prometheus_alerts(self, alerts_file: str) -> str:
@@ -3195,6 +3205,9 @@ Then run the following:
 
     @handle_orch_error
     def set_prometheus_target(self, url: str) -> str:
+        valid_url_pattern = r"^(?!http:\/\/)(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5})$"
+        if re.match(valid_url_pattern, url) is None:
+            return f"Invalid URL '{url}'. It should be in the format host_ip:port"
         prometheus_spec = cast(PrometheusSpec, self.spec_store['prometheus'].spec)
         if url not in prometheus_spec.targets:
             prometheus_spec.targets.append(url)
index 5d5d26ecad13af9c431f2dbfe720d668ffdd59f0..7127406dd5964d3cb7ceb9b7e177bcab7bdf5826 100644 (file)
@@ -1,5 +1,6 @@
 import errno
 import logging
+import json
 import os
 import socket
 from typing import List, Any, Tuple, Dict, Optional, cast
@@ -435,6 +436,7 @@ class PrometheusService(CephadmService):
     DEFAULT_MGR_PROMETHEUS_PORT = 9283
     USER_CFG_KEY = 'prometheus/web_user'
     PASS_CFG_KEY = 'prometheus/web_password'
+    PROMETHEUS_CERT_CFG_KEY = 'prometheus/cert'
 
     def config(self, spec: ServiceSpec) -> None:
         # make sure module is enabled
@@ -509,6 +511,18 @@ class PrometheusService(CephadmService):
         prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials()
         FSID = self.mgr._cluster_fsid
 
+        clusters_credentials = {}
+        multi_cluster_config_raw = str(self.mgr.get_module_option_ex('dashboard', 'MULTICLUSTER_CONFIG'))
+        multi_cluster_config_str = multi_cluster_config_raw.replace("'", '"')
+        valid_multi_cluster_config_str = multi_cluster_config_str.replace('True', '"True"').replace('False', '"False"')
+        multi_cluster_config = json.loads(valid_multi_cluster_config_str)
+        if multi_cluster_config:
+            for url in targets:
+                credentials = self.find_prometheus_credentials(multi_cluster_config, url)
+                if credentials:
+                    clusters_credentials[url] = credentials
+                    clusters_credentials[url]['cert_file_name'] = ''
+
         # generate the prometheus configuration
         context = {
             'alertmanager_url_prefix': '/alertmanager' if mgmt_gw_enabled else '/',
@@ -526,7 +540,8 @@ class PrometheusService(CephadmService):
             'external_prometheus_targets': targets,
             'cluster_fsid': FSID,
             'nfs_sd_url': nfs_sd_url,
-            'smb_sd_url': smb_sd_url
+            'smb_sd_url': smb_sd_url,
+            'clusters_credentials': clusters_credentials
         }
 
         ip_to_bind_to = ''
@@ -544,6 +559,11 @@ class PrometheusService(CephadmService):
         }
 
         if security_enabled:
+            r2: Dict[str, Any] = {'files': {}}
+            for url, credentials in clusters_credentials.items():
+                r2['files'][f'prometheus_{url}_cert.crt'] = credentials['certificate']
+                credentials['cert_file_name'] = f'prometheus_{url}_cert.crt'
+                context['clusters_credentials'] = clusters_credentials
             # Following key/cert are needed for:
             # 1- run the prometheus server (web.yml config)
             # 2- use mTLS to scrape node-exporter (prometheus acts as client)
@@ -674,6 +694,16 @@ class PrometheusService(CephadmService):
             return HandleCommandResult(-errno.EBUSY, '', warn_message)
         return HandleCommandResult(0, warn_message, '')
 
+    def find_prometheus_credentials(self, multicluster_config, url):
+        for cluster_id, clusters in multicluster_config['config'].items():
+            for cluster in clusters:
+                prometheus_url = cluster.get('prometheus_url')
+                if prometheus_url:
+                    valid_url = prometheus_url.replace("https://", "").replace("http://", "")
+                    if valid_url == url:
+                        return cluster.get('prometheus_access_info')
+        return None
+
 
 class NodeExporterService(CephadmService):
     TYPE = 'node-exporter'
index e769486775591ca55b360e666b5cf058d0984f81..3170e4ea2e91decc2f369a7b9bbdd99677273f31 100644 (file)
@@ -2,10 +2,8 @@
 global:
   scrape_interval: 10s
   evaluation_interval: 10s
-{% if not security_enabled %}
   external_labels:
     cluster: {{ cluster_fsid }}
-{% endif %}
 
 rule_files:
   - /etc/prometheus/alerting/*
@@ -39,15 +37,18 @@ alerting:
 
 scrape_configs:
   - job_name: 'ceph'
+    relabel_configs:
+    - source_labels: [__address__]
+      target_label: cluster
+      replacement: {{ cluster_fsid }}
+    - source_labels: [instance]
+      target_label: instance
+      replacement: 'ceph_cluster'
 {% if security_enabled %}
     scheme: https
     tls_config:
       ca_file: root_cert.pem
     honor_labels: true
-    relabel_configs:
-    - source_labels: [instance]
-      target_label: instance
-      replacement: 'ceph_cluster'
     http_sd_configs:
     - url: {{ mgr_prometheus_sd_url }}
       basic_auth:
@@ -57,19 +58,16 @@ scrape_configs:
         ca_file: root_cert.pem
 {% else %}
     honor_labels: true
-    relabel_configs:
-    - source_labels: [__address__]
-      target_label: cluster
-      replacement: {{ cluster_fsid }}
-    - source_labels: [instance]
-      target_label: instance
-      replacement: 'ceph_cluster'
     http_sd_configs:
     - url: {{ mgr_prometheus_sd_url }}
 {% endif %}
 
 {% if node_exporter_sd_url %}
   - job_name: 'node'
+    relabel_configs:
+    - source_labels: [__address__]
+      target_label: cluster
+      replacement: {{ cluster_fsid }}
 {% if security_enabled %}
     scheme: https
     tls_config:
@@ -86,15 +84,15 @@ scrape_configs:
 {% else %}
     http_sd_configs:
     - url: {{ node_exporter_sd_url }}
-    relabel_configs:
-    - source_labels: [__address__]
-      target_label: cluster
-      replacement: {{ cluster_fsid }}
 {% endif %}
 {% endif %}
 
 {% if haproxy_sd_url %}
   - job_name: 'haproxy'
+    relabel_configs:
+    - source_labels: [__address__]
+      target_label: cluster
+      replacement: {{ cluster_fsid }}
 {% if security_enabled %}
     scheme: https
     tls_config:
@@ -109,15 +107,15 @@ scrape_configs:
 {% else %}
     http_sd_configs:
     - url: {{ haproxy_sd_url }}
-    relabel_configs:
-    - source_labels: [__address__]
-      target_label: cluster
-      replacement: {{ cluster_fsid }}
 {% endif %}
 {% endif %}
 
 {% if ceph_exporter_sd_url %}
   - job_name: 'ceph-exporter'
+    relabel_configs:
+    - source_labels: [__address__]
+      target_label: cluster
+      replacement: {{ cluster_fsid }}
 {% if security_enabled %}
     honor_labels: true
     scheme: https
@@ -132,10 +130,6 @@ scrape_configs:
         ca_file: root_cert.pem
 {% else %}
     honor_labels: true
-    relabel_configs:
-    - source_labels: [__address__]
-      target_label: cluster
-      replacement: {{ cluster_fsid }}
     http_sd_configs:
     - url: {{ ceph_exporter_sd_url }}
 {% endif %}
@@ -201,17 +195,27 @@ scrape_configs:
 {% endif %}
 {% endif %}
 
-{% if not security_enabled %}
+{% for url, details in clusters_credentials.items() %}
   - job_name: 'federate'
     scrape_interval: 15s
     honor_labels: true
     metrics_path: '/federate'
+{% if secure_monitoring_stack %}
+    scheme: https
+    tls_config:
+      ca_file: {{ details['cert_file_name'] }}
+    basic_auth:
+      username: {{ details['user'] }}
+      password: {{ details['password'] }}
+{% endif %}
     params:
       'match[]':
         - '{job="ceph"}'
         - '{job="node"}'
         - '{job="haproxy"}'
         - '{job="ceph-exporter"}'
+        - '{job="nvmeof"}'
     static_configs:
-    - targets: {{ external_prometheus_targets }}
-{% endif %}
+    - targets: ['{{ url }}']
+{% endfor %}
+
index f7e6d516b49d971e10ef492bfa1d7cbabdee4afc..1551f0969ff74e3fc72f926e93255730eb2ddcaf 100644 (file)
@@ -107,14 +107,18 @@ class MultiCluster(RESTController):
             prometheus_url = self._proxy('GET', url, 'api/multi-cluster/get_prometheus_api_url',
                                          token=cluster_token, verify=ssl_verify,
                                          cert=ssl_certificate)
+            
+            prometheus_access_info = self._proxy('GET', url,
+                                                 'ui-api/multi-cluster/get_prometheus_access_info',  # noqa E501 #pylint: disable=line-too-long
+                                                 token=cluster_token, verify=ssl_verify,
+                                                 cert=ssl_certificate)
 
             _set_prometheus_targets(prometheus_url)
 
             self.set_multi_cluster_config(fsid, username, url, cluster_alias,
-                                          cluster_token, prometheus_url,
-                                          ssl_verify, ssl_certificate)
+                                          cluster_token, prometheus_url, ssl_verify,
+                                          ssl_certificate, prometheus_access_info)
             return True
-
         return False
 
     def get_cors_endpoints_string(self, hub_url):
@@ -188,7 +192,8 @@ class MultiCluster(RESTController):
         return cluster_token
 
     def set_multi_cluster_config(self, fsid, username, url, cluster_alias, token,
-                                 prometheus_url=None, ssl_verify=False, ssl_certificate=None):
+                                 prometheus_url=None, ssl_verify=False, ssl_certificate=None,
+                                 prometheus_access_info=None):
         multi_cluster_config = self.load_multi_cluster_config()
         if fsid in multi_cluster_config['config']:
             existing_entries = multi_cluster_config['config'][fsid]
@@ -201,7 +206,8 @@ class MultiCluster(RESTController):
                     "token": token,
                     "prometheus_url": prometheus_url if prometheus_url else '',
                     "ssl_verify": ssl_verify,
-                    "ssl_certificate": ssl_certificate if ssl_certificate else ''
+                    "ssl_certificate": ssl_certificate if ssl_certificate else '',
+                    "prometheus_access_info": prometheus_access_info
                 })
         else:
             multi_cluster_config['current_user'] = username
@@ -213,7 +219,8 @@ class MultiCluster(RESTController):
                 "token": token,
                 "prometheus_url": prometheus_url if prometheus_url else '',
                 "ssl_verify": ssl_verify,
-                "ssl_certificate": ssl_certificate if ssl_certificate else ''
+                "ssl_certificate": ssl_certificate if ssl_certificate else '',
+                "prometheus_access_info": prometheus_access_info
             }]
         Settings.MULTICLUSTER_CONFIG = multi_cluster_config
 
@@ -401,6 +408,37 @@ class MultiClusterUi(RESTController):
     @UpdatePermission
     def set_cors_endpoint(self, url: str):
         configure_cors(url)
+    
+    @Endpoint('GET')
+    @ReadPermission
+    def get_prometheus_access_info(self):
+        user = ''
+        password = ''
+        prometheus_cert = ''
+        orch_backend = mgr.get_module_option_ex('orchestrator', 'orchestrator')
+        if orch_backend == 'cephadm':
+            cmd = {
+                'prefix': 'orch prometheus get-credentials',
+            }
+            ret, out, _ = mgr.mon_command(cmd)
+            if ret == 0 and out is not None:
+                access_info = json.loads(out)
+                user = access_info['user']
+                password = access_info['password']
+
+            cert_cmd = {
+                'prefix': 'orch prometheus get-prometheus-cert',
+            }
+            ret, out, _ = mgr.mon_command(cert_cmd)
+            if ret == 0 and out is not None:
+                cert = json.loads(out)
+                prometheus_cert = cert
+
+            return {
+                'user': user,
+                'password': password,
+                'certificate': prometheus_cert
+            }
 
 
 def _set_prometheus_targets(prometheus_url: str):
index c33f38cfdd470f255e33d37f500a6db53971240d..7839e1e8386e7ffc7ad0ef8fe9b95d0d11566edf 100644 (file)
@@ -791,6 +791,10 @@ class Orchestrator(object):
         """set alertmanager access information"""
         raise NotImplementedError()
 
+    def get_prometheus_cert(self, url: str) -> OrchResult[str]:
+        """set prometheus target for multi-cluster"""
+        raise NotImplementedError()
+
     def set_prometheus_access_info(self, user: str, password: str) -> OrchResult[str]:
         """set prometheus access information"""
         raise NotImplementedError()
index d0f3286177ce5fba87487e57d123817fc8c85e2e..d3ef1e59817abfb6aded62cab0c3c8b71ef690e1 100644 (file)
@@ -1232,6 +1232,12 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule,
         completion = self.set_prometheus_target(url)
         result = raise_if_exception(completion)
         return HandleCommandResult(stdout=json.dumps(result))
+    
+    @_cli_write_command('orch prometheus get-prometheus-cert')
+    def _get_prometheus_cert(self) -> HandleCommandResult:
+        completion = self.get_prometheus_cert()
+        result = raise_if_exception(completion)
+        return HandleCommandResult(stdout=json.dumps(result))
 
     @_cli_write_command('orch prometheus remove-target')
     def _remove_prometheus_target(self, url: str) -> HandleCommandResult: