]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/cephadm: enable SNI and proper upstream cert verification
authorRedouane Kachach <rkachach@ibm.com>
Thu, 9 Oct 2025 10:12:21 +0000 (12:12 +0200)
committerRedouane Kachach <rkachach@ibm.com>
Thu, 9 Oct 2025 10:14:21 +0000 (12:14 +0200)
The mgmt-gateway’s NGINX configuration did not enable SNI or
hostname-based verification when proxying HTTPS requests to
backend services (such as Prometheus, Grafana or Alertmanager).

By default, NGINX did not send the Server Name Indication (SNI)
during the TLS handshake and used the upstream group name (e.g.
prometheus_servers) as the certificate verification target. As a
result, certificate validation could fail or use the wrong
certificate when multiple upstreams are defined.

Fixes: https://tracker.ceph.com/issues/73368
Signed-off-by: Redouane Kachach <rkachach@ibm.com>
src/pybind/mgr/cephadm/services/mgmt_gateway.py
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2
src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2
src/pybind/mgr/cephadm/tests/test_services.py

index eb73878aeaf2c3916d1a5c9a1b93f57ce38f12bf..e4ceff8b27c825c583ebd22a63faff74ad7e06e9 100644 (file)
@@ -33,7 +33,7 @@ class MgmtGatewayService(CephadmService):
         srv_entries = []
         for dd in self.mgr.cache.get_daemons_by_service(service_name):
             assert dd.hostname is not None
-            addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
+            addr = dd.hostname or (dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname))
             port = dd.ports[0] if dd.ports else None
             srv_entries.append(f'{addr}:{port}')
         return srv_entries
@@ -59,7 +59,7 @@ class MgmtGatewayService(CephadmService):
         sd_endpoints = []
         for dd in self.mgr.cache.get_daemons_by_service('mgr'):
             assert dd.hostname is not None
-            addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
+            addr = dd.hostname or (dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname))
             sd_endpoints.append(f"{addr}:{self.mgr.service_discovery_port}")
         return sd_endpoints
 
index 3dc54f667a93c35f2602cdb1b6360adcaa8b0bab..aa0bdb50df336dc5ae7b6575f190b2cf255c7eed 100644 (file)
@@ -9,7 +9,6 @@ import requests
 from mgr_module import HandleCommandResult
 from .service_registry import register_cephadm_service
 from cephadm.services.service_registry import service_registry
-from cephadm.tlsobject_types import TLSCredentials
 
 from orchestrator import DaemonDescription
 from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, \
@@ -144,22 +143,17 @@ class GrafanaService(CephadmService):
 
         return ''
 
-    def get_grafana_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> TLSCredentials:
-        host_ips = [self.mgr.inventory.get_addr(daemon_spec.host)]
-        host_fqdns = [self.mgr.get_fqdn(daemon_spec.host), 'grafana_servers']
-        return self.get_certificates(daemon_spec, host_ips, host_fqdns)
-
     def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
         assert self.TYPE == daemon_spec.daemon_type
 
-        tls_pair = self.get_grafana_certificates(daemon_spec)
-        if not tls_pair.cert or not tls_pair.key:
+        tls_creds = self.get_certificates(daemon_spec)
+        if not tls_creds.cert or not tls_creds.key:
             # this will lead to an error in the daemon as certificates are needed
             logger.error(f'Cannot generate the needed certificates to deploy Grafana on {daemon_spec.host}')
 
         security_enabled, mgmt_gw_enabled, oauth2_enabled = self.mgr._get_security_config()
         grafana_ini = self.generate_grafana_ini(daemon_spec, mgmt_gw_enabled, oauth2_enabled)
-        grafana_data_sources = self.generate_data_sources(security_enabled, mgmt_gw_enabled, tls_pair.cert, tls_pair.key)
+        grafana_data_sources = self.generate_data_sources(security_enabled, mgmt_gw_enabled, tls_creds.cert, tls_creds.key)
         # the path of the grafana dashboards are assumed from the providers.yml.j2 file by grafana
         grafana_dashboards_path = self.mgr.grafana_dashboards_path or '/etc/grafana/dashboards/ceph-dashboard/'
 
@@ -172,8 +166,8 @@ class GrafanaService(CephadmService):
             'files': {
                 "grafana.ini": grafana_ini,
                 'provisioning/datasources/ceph-dashboard.yml': grafana_data_sources,
-                'certs/cert_file': '# generated by cephadm\n%s' % tls_pair.cert,
-                'certs/cert_key': '# generated by cephadm\n%s' % tls_pair.key,
+                'certs/cert_file': '# generated by cephadm\n%s' % tls_creds.cert,
+                'certs/cert_key': '# generated by cephadm\n%s' % tls_creds.key,
                 'provisioning/dashboards/default.yml': self.mgr.template.render(
                     'services/grafana/providers.yml.j2', {
                         'grafana_dashboards_path': grafana_dashboards_path
@@ -286,11 +280,6 @@ class AlertmanagerService(CephadmService):
     def needs_monitoring(self) -> bool:
         return True
 
-    def get_alertmanager_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> TLSCredentials:
-        host_ips = [self.mgr.inventory.get_addr(daemon_spec.host)]
-        host_fqdns = [self.mgr.get_fqdn(daemon_spec.host), 'alertmanager_servers']
-        return self.get_certificates(daemon_spec, host_ips, host_fqdns)
-
     @classmethod
     def get_dependencies(cls, mgr: "CephadmOrchestrator",
                          spec: Optional[ServiceSpec] = None,
@@ -367,7 +356,7 @@ class AlertmanagerService(CephadmService):
         deps = self.get_dependencies(self.mgr)
         if security_enabled:
             alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials()
-            tls_pair = self.get_alertmanager_certificates(daemon_spec)
+            tls_creds = self.get_certificates(daemon_spec)
             context = {
                 'enable_mtls': mgmt_gw_enabled,
                 'enable_basic_auth': not oauth2_enabled,
@@ -377,8 +366,8 @@ class AlertmanagerService(CephadmService):
             return {
                 "files": {
                     "alertmanager.yml": yml,
-                    'alertmanager.crt': tls_pair.cert,
-                    'alertmanager.key': tls_pair.key,
+                    'alertmanager.crt': tls_creds.cert,
+                    'alertmanager.key': tls_creds.key,
                     'web.yml': self.mgr.template.render('services/alertmanager/web.yml.j2', context),
                     'root_cert.pem': self.mgr.cert_mgr.get_root_ca()
                 },
@@ -492,11 +481,6 @@ class PrometheusService(CephadmService):
             # we shouldn't get here (mon will tell the mgr to respawn), but no
             # harm done if we do.
 
-    def get_prometheus_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> TLSCredentials:
-        host_ips = [self.mgr.inventory.get_addr(daemon_spec.host)]
-        host_fqdns = [self.mgr.get_fqdn(daemon_spec.host), 'prometheus_servers']
-        return self.get_certificates(daemon_spec, host_ips, host_fqdns)
-
     def get_service_discovery_cfg(self, security_enabled: bool, mgmt_gw_enabled: bool) -> Dict[str, List[str]]:
         """
         Retrieves the service discovery URLs for the services that require monitoring
@@ -620,12 +604,12 @@ class PrometheusService(CephadmService):
                 'prometheus_web_user': prometheus_user,
                 'prometheus_web_password': password_hash(prometheus_password),
             }
-            tls_pair = self.get_prometheus_certificates(daemon_spec)
+            tls_creds = self.get_certificates(daemon_spec)
             files.update({
                 'root_cert.pem': self.mgr.cert_mgr.get_root_ca(),
                 'web.yml': self.mgr.template.render('services/prometheus/web.yml.j2', web_context),
-                'prometheus.crt': tls_pair.cert,
-                'prometheus.key': tls_pair.key,
+                'prometheus.crt': tls_creds.cert,
+                'prometheus.key': tls_creds.key,
                 **cluster_credentials_files['files']
             })
             r.update({'web_config': '/etc/prometheus/web.yml'})
@@ -783,14 +767,14 @@ class NodeExporterService(CephadmService):
         deps += [f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}']
         security_enabled, mgmt_gw_enabled, _ = self.mgr._get_security_config()
         if security_enabled:
-            tls_pair = self.get_certificates(daemon_spec)
+            tls_creds = self.get_certificates(daemon_spec)
             r = {
                 'files': {
                     'web.yml': self.mgr.template.render('services/node-exporter/web.yml.j2',
                                                         {'enable_mtls': mgmt_gw_enabled}),
                     'root_cert.pem': self.mgr.cert_mgr.get_root_ca(),
-                    'node_exporter.crt': tls_pair.cert,
-                    'node_exporter.key': tls_pair.key,
+                    'node_exporter.crt': tls_creds.cert,
+                    'node_exporter.key': tls_creds.key,
                 },
                 'web_config': '/etc/node-exporter/web.yml'
             }
index 3db1a1142b35d9adc10d2c00351d7ca3e298a834..eb5a7057fa40f7e248eb7ff39f8f64a8c6b85f4d 100644 (file)
@@ -78,6 +78,8 @@ server {
     location / {
         proxy_pass {{ dashboard_scheme }}://dashboard_servers;
         proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
+        proxy_ssl_server_name on;
+        proxy_ssl_name $ssl_server_name;
         {% if enable_oauth2_proxy %}
         auth_request /oauth2/auth;
         error_page 401 = /oauth2/sign_in;
@@ -120,6 +122,8 @@ server {
         # will send this header if Grafana is running on the same node as one of those services
         proxy_set_header Authorization "";
         proxy_buffering off;
+        proxy_ssl_server_name on;
+        proxy_ssl_name $ssl_server_name;
         {% if enable_oauth2_proxy %}
         auth_request /oauth2/auth;
         error_page 401 = /oauth2/sign_in;
@@ -155,7 +159,9 @@ server {
         proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key;
         proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt;
         proxy_ssl_verify on;
-        proxy_ssl_verify_depth 2;
+        proxy_ssl_verify_depth 1;
+        proxy_ssl_server_name on;
+        proxy_ssl_name $ssl_server_name;
         {% if enable_oauth2_proxy %}
         auth_request /oauth2/auth;
         error_page 401 = /oauth2/sign_in;
@@ -179,7 +185,9 @@ server {
         proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key;
         proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt;
         proxy_ssl_verify on;
-        proxy_ssl_verify_depth 2;
+        proxy_ssl_verify_depth 1;
+        proxy_ssl_server_name on;
+        proxy_ssl_name $ssl_server_name;
         {% if enable_oauth2_proxy %}
         auth_request /oauth2/auth;
         error_page 401 = /oauth2/sign_in;
index 2abb24b2eba64c02f291b8cc4f5d86bba7a0ffe9..521b48dbdb8c5d024e2e468f05674f84b07778a1 100644 (file)
@@ -23,6 +23,8 @@ server {
         rewrite ^/internal/(.*) /$1 break;
         proxy_pass https://service_discovery_servers;
         proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
+        proxy_ssl_server_name on;
+        proxy_ssl_name $ssl_server_name;
     }
 {% endif %}
 
@@ -31,6 +33,8 @@ server {
         rewrite ^/internal/dashboard/(.*) /$1 break;
         proxy_pass {{ dashboard_scheme }}://dashboard_servers;
         proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
+        proxy_ssl_server_name on;
+        proxy_ssl_name $ssl_server_name;
     }
 {% endif %}
 
@@ -38,6 +42,8 @@ server {
     location /internal/grafana {
         rewrite ^/internal/grafana/(.*) /$1 break;
         proxy_pass {{ grafana_scheme }}://grafana_servers;
+        proxy_ssl_server_name on;
+        proxy_ssl_name $ssl_server_name;
     }
 {% endif %}
 
@@ -50,7 +56,9 @@ server {
         proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key;
         proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt;
         proxy_ssl_verify on;
-        proxy_ssl_verify_depth 2;
+        proxy_ssl_verify_depth 1;
+        proxy_ssl_server_name on;
+        proxy_ssl_name $ssl_server_name;
     }
 {% endif %}
 
@@ -63,7 +71,9 @@ server {
         proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key;
         proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt;
         proxy_ssl_verify on;
-        proxy_ssl_verify_depth 2;
+        proxy_ssl_verify_depth 1;
+        proxy_ssl_server_name on;
+        proxy_ssl_name $ssl_server_name;
     }
 {% endif %}
 }
index c990129e769d08d4fd146d22699ea73e01132bf2..1b96ab392f781115149b634f47dc9d56c2e2988b 100644 (file)
@@ -4999,6 +4999,8 @@ class TestMgmtGateway:
                                                  location / {
                                                      proxy_pass https://dashboard_servers;
                                                      proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
+                                                     proxy_ssl_server_name on;
+                                                     proxy_ssl_name $ssl_server_name;
                                                  }
 
                                                  location /grafana {
@@ -5007,6 +5009,8 @@ class TestMgmtGateway:
                                                      # will send this header if Grafana is running on the same node as one of those services
                                                      proxy_set_header Authorization "";
                                                      proxy_buffering off;
+                                                     proxy_ssl_server_name on;
+                                                     proxy_ssl_name $ssl_server_name;
                                                  }
 
                                                  location /prometheus {
@@ -5016,7 +5020,9 @@ class TestMgmtGateway:
                                                      proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key;
                                                      proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt;
                                                      proxy_ssl_verify on;
-                                                     proxy_ssl_verify_depth 2;
+                                                     proxy_ssl_verify_depth 1;
+                                                     proxy_ssl_server_name on;
+                                                     proxy_ssl_name $ssl_server_name;
                                                  }
 
                                                  location /alertmanager {
@@ -5026,7 +5032,9 @@ class TestMgmtGateway:
                                                      proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key;
                                                      proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt;
                                                      proxy_ssl_verify on;
-                                                     proxy_ssl_verify_depth 2;
+                                                     proxy_ssl_verify_depth 1;
+                                                     proxy_ssl_server_name on;
+                                                     proxy_ssl_name $ssl_server_name;
                                                  }
                                              }"""),
                     "nginx_internal_server.conf": dedent("""
@@ -5047,17 +5055,23 @@ class TestMgmtGateway:
                                                      rewrite ^/internal/(.*) /$1 break;
                                                      proxy_pass https://service_discovery_servers;
                                                      proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
+                                                     proxy_ssl_server_name on;
+                                                     proxy_ssl_name $ssl_server_name;
                                                  }
 
                                                  location /internal/dashboard {
                                                      rewrite ^/internal/dashboard/(.*) /$1 break;
                                                      proxy_pass https://dashboard_servers;
                                                      proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
+                                                     proxy_ssl_server_name on;
+                                                     proxy_ssl_name $ssl_server_name;
                                                  }
 
                                                  location /internal/grafana {
                                                      rewrite ^/internal/grafana/(.*) /$1 break;
                                                      proxy_pass https://grafana_servers;
+                                                     proxy_ssl_server_name on;
+                                                     proxy_ssl_name $ssl_server_name;
                                                  }
 
                                                  location /internal/prometheus {
@@ -5068,7 +5082,9 @@ class TestMgmtGateway:
                                                      proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key;
                                                      proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt;
                                                      proxy_ssl_verify on;
-                                                     proxy_ssl_verify_depth 2;
+                                                     proxy_ssl_verify_depth 1;
+                                                     proxy_ssl_server_name on;
+                                                     proxy_ssl_name $ssl_server_name;
                                                  }
 
                                                  location /internal/alertmanager {
@@ -5079,7 +5095,9 @@ class TestMgmtGateway:
                                                      proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key;
                                                      proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt;
                                                      proxy_ssl_verify on;
-                                                     proxy_ssl_verify_depth 2;
+                                                     proxy_ssl_verify_depth 1;
+                                                     proxy_ssl_server_name on;
+                                                     proxy_ssl_name $ssl_server_name;
                                                  }
                                              }"""),
                     "nginx_internal.crt": f"{ceph_generated_cert}",
@@ -5271,6 +5289,8 @@ class TestMgmtGateway:
                                                  location / {
                                                      proxy_pass https://dashboard_servers;
                                                      proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
+                                                     proxy_ssl_server_name on;
+                                                     proxy_ssl_name $ssl_server_name;
                                                      auth_request /oauth2/auth;
                                                      error_page 401 = /oauth2/sign_in;
 
@@ -5309,6 +5329,8 @@ class TestMgmtGateway:
                                                      # will send this header if Grafana is running on the same node as one of those services
                                                      proxy_set_header Authorization "";
                                                      proxy_buffering off;
+                                                     proxy_ssl_server_name on;
+                                                     proxy_ssl_name $ssl_server_name;
                                                      auth_request /oauth2/auth;
                                                      error_page 401 = /oauth2/sign_in;
 
@@ -5340,7 +5362,9 @@ class TestMgmtGateway:
                                                      proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key;
                                                      proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt;
                                                      proxy_ssl_verify on;
-                                                     proxy_ssl_verify_depth 2;
+                                                     proxy_ssl_verify_depth 1;
+                                                     proxy_ssl_server_name on;
+                                                     proxy_ssl_name $ssl_server_name;
                                                      auth_request /oauth2/auth;
                                                      error_page 401 = /oauth2/sign_in;
 
@@ -5360,7 +5384,9 @@ class TestMgmtGateway:
                                                      proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key;
                                                      proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt;
                                                      proxy_ssl_verify on;
-                                                     proxy_ssl_verify_depth 2;
+                                                     proxy_ssl_verify_depth 1;
+                                                     proxy_ssl_server_name on;
+                                                     proxy_ssl_name $ssl_server_name;
                                                      auth_request /oauth2/auth;
                                                      error_page 401 = /oauth2/sign_in;
 
@@ -5391,17 +5417,23 @@ class TestMgmtGateway:
                                                      rewrite ^/internal/(.*) /$1 break;
                                                      proxy_pass https://service_discovery_servers;
                                                      proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
+                                                     proxy_ssl_server_name on;
+                                                     proxy_ssl_name $ssl_server_name;
                                                  }
 
                                                  location /internal/dashboard {
                                                      rewrite ^/internal/dashboard/(.*) /$1 break;
                                                      proxy_pass https://dashboard_servers;
                                                      proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
+                                                     proxy_ssl_server_name on;
+                                                     proxy_ssl_name $ssl_server_name;
                                                  }
 
                                                  location /internal/grafana {
                                                      rewrite ^/internal/grafana/(.*) /$1 break;
                                                      proxy_pass https://grafana_servers;
+                                                     proxy_ssl_server_name on;
+                                                     proxy_ssl_name $ssl_server_name;
                                                  }
 
                                                  location /internal/prometheus {
@@ -5412,7 +5444,9 @@ class TestMgmtGateway:
                                                      proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key;
                                                      proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt;
                                                      proxy_ssl_verify on;
-                                                     proxy_ssl_verify_depth 2;
+                                                     proxy_ssl_verify_depth 1;
+                                                     proxy_ssl_server_name on;
+                                                     proxy_ssl_name $ssl_server_name;
                                                  }
 
                                                  location /internal/alertmanager {
@@ -5423,7 +5457,9 @@ class TestMgmtGateway:
                                                      proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key;
                                                      proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt;
                                                      proxy_ssl_verify on;
-                                                     proxy_ssl_verify_depth 2;
+                                                     proxy_ssl_verify_depth 1;
+                                                     proxy_ssl_server_name on;
+                                                     proxy_ssl_name $ssl_server_name;
                                                  }
                                              }"""),
                     "nginx_internal.crt": f"{ceph_generated_cert}",