From: Redouane Kachach Date: Thu, 9 Oct 2025 10:12:21 +0000 (+0200) Subject: mgr/cephadm: enable SNI and proper upstream cert verification X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=194a0693ac2313358364982e70cf9030138d8d8b;p=ceph-ci.git mgr/cephadm: enable SNI and proper upstream cert verification The mgmt-gateway’s NGINX configuration did not enable SNI or hostname-based verification when proxying HTTPS requests to backend services (such as Prometheus, Grafana or Alertmanager). By default, NGINX did not send the Server Name Indication (SNI) during the TLS handshake and used the upstream group name (e.g. prometheus_servers) as the certificate verification target. As a result, certificate validation could fail or use the wrong certificate when multiple upstreams are defined. Fixes: https://tracker.ceph.com/issues/73368 Signed-off-by: Redouane Kachach --- diff --git a/src/pybind/mgr/cephadm/services/mgmt_gateway.py b/src/pybind/mgr/cephadm/services/mgmt_gateway.py index eb73878aeaf..e4ceff8b27c 100644 --- a/src/pybind/mgr/cephadm/services/mgmt_gateway.py +++ b/src/pybind/mgr/cephadm/services/mgmt_gateway.py @@ -33,7 +33,7 @@ class MgmtGatewayService(CephadmService): srv_entries = [] for dd in self.mgr.cache.get_daemons_by_service(service_name): assert dd.hostname is not None - addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname) + addr = dd.hostname or (dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)) port = dd.ports[0] if dd.ports else None srv_entries.append(f'{addr}:{port}') return srv_entries @@ -59,7 +59,7 @@ class MgmtGatewayService(CephadmService): sd_endpoints = [] for dd in self.mgr.cache.get_daemons_by_service('mgr'): assert dd.hostname is not None - addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname) + addr = dd.hostname or (dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)) sd_endpoints.append(f"{addr}:{self.mgr.service_discovery_port}") return sd_endpoints diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index 3dc54f667a9..aa0bdb50df3 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -9,7 +9,6 @@ import requests from mgr_module import HandleCommandResult from .service_registry import register_cephadm_service from cephadm.services.service_registry import service_registry -from cephadm.tlsobject_types import TLSCredentials from orchestrator import DaemonDescription from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, \ @@ -144,22 +143,17 @@ class GrafanaService(CephadmService): return '' - def get_grafana_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> TLSCredentials: - host_ips = [self.mgr.inventory.get_addr(daemon_spec.host)] - host_fqdns = [self.mgr.get_fqdn(daemon_spec.host), 'grafana_servers'] - return self.get_certificates(daemon_spec, host_ips, host_fqdns) - def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: assert self.TYPE == daemon_spec.daemon_type - tls_pair = self.get_grafana_certificates(daemon_spec) - if not tls_pair.cert or not tls_pair.key: + tls_creds = self.get_certificates(daemon_spec) + if not tls_creds.cert or not tls_creds.key: # this will lead to an error in the daemon as certificates are needed logger.error(f'Cannot generate the needed certificates to deploy Grafana on {daemon_spec.host}') security_enabled, mgmt_gw_enabled, oauth2_enabled = self.mgr._get_security_config() grafana_ini = self.generate_grafana_ini(daemon_spec, mgmt_gw_enabled, oauth2_enabled) - grafana_data_sources = self.generate_data_sources(security_enabled, mgmt_gw_enabled, tls_pair.cert, tls_pair.key) + grafana_data_sources = self.generate_data_sources(security_enabled, mgmt_gw_enabled, tls_creds.cert, tls_creds.key) # the path of the grafana dashboards are assumed from the providers.yml.j2 file by grafana grafana_dashboards_path = self.mgr.grafana_dashboards_path or '/etc/grafana/dashboards/ceph-dashboard/' @@ -172,8 +166,8 @@ class GrafanaService(CephadmService): 'files': { "grafana.ini": grafana_ini, 'provisioning/datasources/ceph-dashboard.yml': grafana_data_sources, - 'certs/cert_file': '# generated by cephadm\n%s' % tls_pair.cert, - 'certs/cert_key': '# generated by cephadm\n%s' % tls_pair.key, + 'certs/cert_file': '# generated by cephadm\n%s' % tls_creds.cert, + 'certs/cert_key': '# generated by cephadm\n%s' % tls_creds.key, 'provisioning/dashboards/default.yml': self.mgr.template.render( 'services/grafana/providers.yml.j2', { 'grafana_dashboards_path': grafana_dashboards_path @@ -286,11 +280,6 @@ class AlertmanagerService(CephadmService): def needs_monitoring(self) -> bool: return True - def get_alertmanager_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> TLSCredentials: - host_ips = [self.mgr.inventory.get_addr(daemon_spec.host)] - host_fqdns = [self.mgr.get_fqdn(daemon_spec.host), 'alertmanager_servers'] - return self.get_certificates(daemon_spec, host_ips, host_fqdns) - @classmethod def get_dependencies(cls, mgr: "CephadmOrchestrator", spec: Optional[ServiceSpec] = None, @@ -367,7 +356,7 @@ class AlertmanagerService(CephadmService): deps = self.get_dependencies(self.mgr) if security_enabled: alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials() - tls_pair = self.get_alertmanager_certificates(daemon_spec) + tls_creds = self.get_certificates(daemon_spec) context = { 'enable_mtls': mgmt_gw_enabled, 'enable_basic_auth': not oauth2_enabled, @@ -377,8 +366,8 @@ class AlertmanagerService(CephadmService): return { "files": { "alertmanager.yml": yml, - 'alertmanager.crt': tls_pair.cert, - 'alertmanager.key': tls_pair.key, + 'alertmanager.crt': tls_creds.cert, + 'alertmanager.key': tls_creds.key, 'web.yml': self.mgr.template.render('services/alertmanager/web.yml.j2', context), 'root_cert.pem': self.mgr.cert_mgr.get_root_ca() }, @@ -492,11 +481,6 @@ class PrometheusService(CephadmService): # we shouldn't get here (mon will tell the mgr to respawn), but no # harm done if we do. - def get_prometheus_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> TLSCredentials: - host_ips = [self.mgr.inventory.get_addr(daemon_spec.host)] - host_fqdns = [self.mgr.get_fqdn(daemon_spec.host), 'prometheus_servers'] - return self.get_certificates(daemon_spec, host_ips, host_fqdns) - def get_service_discovery_cfg(self, security_enabled: bool, mgmt_gw_enabled: bool) -> Dict[str, List[str]]: """ Retrieves the service discovery URLs for the services that require monitoring @@ -620,12 +604,12 @@ class PrometheusService(CephadmService): 'prometheus_web_user': prometheus_user, 'prometheus_web_password': password_hash(prometheus_password), } - tls_pair = self.get_prometheus_certificates(daemon_spec) + tls_creds = self.get_certificates(daemon_spec) files.update({ 'root_cert.pem': self.mgr.cert_mgr.get_root_ca(), 'web.yml': self.mgr.template.render('services/prometheus/web.yml.j2', web_context), - 'prometheus.crt': tls_pair.cert, - 'prometheus.key': tls_pair.key, + 'prometheus.crt': tls_creds.cert, + 'prometheus.key': tls_creds.key, **cluster_credentials_files['files'] }) r.update({'web_config': '/etc/prometheus/web.yml'}) @@ -783,14 +767,14 @@ class NodeExporterService(CephadmService): deps += [f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}'] security_enabled, mgmt_gw_enabled, _ = self.mgr._get_security_config() if security_enabled: - tls_pair = self.get_certificates(daemon_spec) + tls_creds = self.get_certificates(daemon_spec) r = { 'files': { 'web.yml': self.mgr.template.render('services/node-exporter/web.yml.j2', {'enable_mtls': mgmt_gw_enabled}), 'root_cert.pem': self.mgr.cert_mgr.get_root_ca(), - 'node_exporter.crt': tls_pair.cert, - 'node_exporter.key': tls_pair.key, + 'node_exporter.crt': tls_creds.cert, + 'node_exporter.key': tls_creds.key, }, 'web_config': '/etc/node-exporter/web.yml' } diff --git a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2 b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2 index 3db1a1142b3..eb5a7057fa4 100644 --- a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2 +++ b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2 @@ -78,6 +78,8 @@ server { location / { proxy_pass {{ dashboard_scheme }}://dashboard_servers; proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; {% if enable_oauth2_proxy %} auth_request /oauth2/auth; error_page 401 = /oauth2/sign_in; @@ -120,6 +122,8 @@ server { # will send this header if Grafana is running on the same node as one of those services proxy_set_header Authorization ""; proxy_buffering off; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; {% if enable_oauth2_proxy %} auth_request /oauth2/auth; error_page 401 = /oauth2/sign_in; @@ -155,7 +159,9 @@ server { proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt; proxy_ssl_verify on; - proxy_ssl_verify_depth 2; + proxy_ssl_verify_depth 1; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; {% if enable_oauth2_proxy %} auth_request /oauth2/auth; error_page 401 = /oauth2/sign_in; @@ -179,7 +185,9 @@ server { proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt; proxy_ssl_verify on; - proxy_ssl_verify_depth 2; + proxy_ssl_verify_depth 1; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; {% if enable_oauth2_proxy %} auth_request /oauth2/auth; error_page 401 = /oauth2/sign_in; diff --git a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2 b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2 index 2abb24b2eba..521b48dbdb8 100644 --- a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2 +++ b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2 @@ -23,6 +23,8 @@ server { rewrite ^/internal/(.*) /$1 break; proxy_pass https://service_discovery_servers; proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; } {% endif %} @@ -31,6 +33,8 @@ server { rewrite ^/internal/dashboard/(.*) /$1 break; proxy_pass {{ dashboard_scheme }}://dashboard_servers; proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; } {% endif %} @@ -38,6 +42,8 @@ server { location /internal/grafana { rewrite ^/internal/grafana/(.*) /$1 break; proxy_pass {{ grafana_scheme }}://grafana_servers; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; } {% endif %} @@ -50,7 +56,9 @@ server { proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt; proxy_ssl_verify on; - proxy_ssl_verify_depth 2; + proxy_ssl_verify_depth 1; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; } {% endif %} @@ -63,7 +71,9 @@ server { proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt; proxy_ssl_verify on; - proxy_ssl_verify_depth 2; + proxy_ssl_verify_depth 1; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; } {% endif %} } diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index c990129e769..1b96ab392f7 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -4999,6 +4999,8 @@ class TestMgmtGateway: location / { proxy_pass https://dashboard_servers; proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; } location /grafana { @@ -5007,6 +5009,8 @@ class TestMgmtGateway: # will send this header if Grafana is running on the same node as one of those services proxy_set_header Authorization ""; proxy_buffering off; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; } location /prometheus { @@ -5016,7 +5020,9 @@ class TestMgmtGateway: proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt; proxy_ssl_verify on; - proxy_ssl_verify_depth 2; + proxy_ssl_verify_depth 1; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; } location /alertmanager { @@ -5026,7 +5032,9 @@ class TestMgmtGateway: proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt; proxy_ssl_verify on; - proxy_ssl_verify_depth 2; + proxy_ssl_verify_depth 1; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; } }"""), "nginx_internal_server.conf": dedent(""" @@ -5047,17 +5055,23 @@ class TestMgmtGateway: rewrite ^/internal/(.*) /$1 break; proxy_pass https://service_discovery_servers; proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; } location /internal/dashboard { rewrite ^/internal/dashboard/(.*) /$1 break; proxy_pass https://dashboard_servers; proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; } location /internal/grafana { rewrite ^/internal/grafana/(.*) /$1 break; proxy_pass https://grafana_servers; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; } location /internal/prometheus { @@ -5068,7 +5082,9 @@ class TestMgmtGateway: proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt; proxy_ssl_verify on; - proxy_ssl_verify_depth 2; + proxy_ssl_verify_depth 1; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; } location /internal/alertmanager { @@ -5079,7 +5095,9 @@ class TestMgmtGateway: proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt; proxy_ssl_verify on; - proxy_ssl_verify_depth 2; + proxy_ssl_verify_depth 1; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; } }"""), "nginx_internal.crt": f"{ceph_generated_cert}", @@ -5271,6 +5289,8 @@ class TestMgmtGateway: location / { proxy_pass https://dashboard_servers; proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; auth_request /oauth2/auth; error_page 401 = /oauth2/sign_in; @@ -5309,6 +5329,8 @@ class TestMgmtGateway: # will send this header if Grafana is running on the same node as one of those services proxy_set_header Authorization ""; proxy_buffering off; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; auth_request /oauth2/auth; error_page 401 = /oauth2/sign_in; @@ -5340,7 +5362,9 @@ class TestMgmtGateway: proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt; proxy_ssl_verify on; - proxy_ssl_verify_depth 2; + proxy_ssl_verify_depth 1; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; auth_request /oauth2/auth; error_page 401 = /oauth2/sign_in; @@ -5360,7 +5384,9 @@ class TestMgmtGateway: proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt; proxy_ssl_verify on; - proxy_ssl_verify_depth 2; + proxy_ssl_verify_depth 1; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; auth_request /oauth2/auth; error_page 401 = /oauth2/sign_in; @@ -5391,17 +5417,23 @@ class TestMgmtGateway: rewrite ^/internal/(.*) /$1 break; proxy_pass https://service_discovery_servers; proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; } location /internal/dashboard { rewrite ^/internal/dashboard/(.*) /$1 break; proxy_pass https://dashboard_servers; proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; } location /internal/grafana { rewrite ^/internal/grafana/(.*) /$1 break; proxy_pass https://grafana_servers; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; } location /internal/prometheus { @@ -5412,7 +5444,9 @@ class TestMgmtGateway: proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt; proxy_ssl_verify on; - proxy_ssl_verify_depth 2; + proxy_ssl_verify_depth 1; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; } location /internal/alertmanager { @@ -5423,7 +5457,9 @@ class TestMgmtGateway: proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt; proxy_ssl_verify on; - proxy_ssl_verify_depth 2; + proxy_ssl_verify_depth 1; + proxy_ssl_server_name on; + proxy_ssl_name $ssl_server_name; } }"""), "nginx_internal.crt": f"{ceph_generated_cert}",