From: Redouane Kachach Date: Thu, 17 Oct 2024 08:42:27 +0000 (+0200) Subject: mgr/cephadm: adding HA support for mgmt-gateway and ouath2-proxy X-Git-Tag: v20.0.0~766^2~3 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=0392505386e8f379d0ae7a2bc9f81cbd5e4e07eb;p=ceph.git mgr/cephadm: adding HA support for mgmt-gateway and ouath2-proxy adding HA support for mgmt-gateway and ouath2-proxy. In addition, logic is add to prevent unnecessary daemons restart during mgr failover. Previously, without the management gateway, some daemons, such as Prometheus, had an explicit dependency on the manager because we needed to point to the active manager. With the management gateway, this explicit dependency is no longer necessary, as it automatically handles routing requests to the active manager. Signed-off-by: Redouane Kachach --- diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 3a112d6a9574a..263bff19f5a79 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -819,30 +819,33 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, security_enabled = self.secure_monitoring_stack or mgmt_gw_enabled return security_enabled, mgmt_gw_enabled, oauth2_proxy_enabled - def get_mgmt_gw_internal_endpoint(self) -> Optional[str]: + def _get_mgmt_gw_endpoint(self, is_internal: bool) -> Optional[str]: mgmt_gw_daemons = self.cache.get_daemons_by_service('mgmt-gateway') if not mgmt_gw_daemons: return None dd = mgmt_gw_daemons[0] assert dd.hostname is not None - mgmt_gw_addr = self.get_fqdn(dd.hostname) - mgmt_gw_internal_endpoint = build_url(scheme='https', host=mgmt_gw_addr, port=MgmtGatewayService.INTERNAL_SERVICE_PORT) - return f'{mgmt_gw_internal_endpoint}/internal' + mgmt_gw_spec = cast(MgmtGatewaySpec, self.spec_store['mgmt-gateway'].spec) + mgmt_gw_addr = mgmt_gw_spec.virtual_ip if mgmt_gw_spec.virtual_ip is not None else self.get_fqdn(dd.hostname) - def get_mgmt_gw_external_endpoint(self) -> Optional[str]: - mgmt_gw_daemons = self.cache.get_daemons_by_service('mgmt-gateway') - if not mgmt_gw_daemons: - return None + if is_internal: + mgmt_gw_port: Optional[int] = MgmtGatewayService.INTERNAL_SERVICE_PORT + protocol = 'https' + endpoint_suffix = '/internal' + else: + mgmt_gw_port = dd.ports[0] if dd.ports else None + protocol = 'http' if mgmt_gw_spec.disable_https else 'https' + endpoint_suffix = '' - dd = mgmt_gw_daemons[0] - assert dd.hostname is not None - mgmt_gw_port = dd.ports[0] if dd.ports else None - mgmt_gw_addr = self.get_fqdn(dd.hostname) - mgmt_gw_spec = cast(MgmtGatewaySpec, self.spec_store['mgmt-gateway'].spec) - protocol = 'http' if mgmt_gw_spec.disable_https else 'https' - mgmt_gw_external_endpoint = build_url(scheme=protocol, host=mgmt_gw_addr, port=mgmt_gw_port) - return mgmt_gw_external_endpoint + mgmt_gw_endpoint = build_url(scheme=protocol, host=mgmt_gw_addr, port=mgmt_gw_port) + return f'{mgmt_gw_endpoint}{endpoint_suffix}' + + def get_mgmt_gw_internal_endpoint(self) -> Optional[str]: + return self._get_mgmt_gw_endpoint(is_internal=True) + + def get_mgmt_gw_external_endpoint(self) -> Optional[str]: + return self._get_mgmt_gw_endpoint(is_internal=False) def _get_cephadm_binary_path(self) -> str: import hashlib @@ -3001,8 +3004,16 @@ Then run the following: daemon_names.append(dd.name()) return daemon_names - alertmanager_user, alertmanager_password = self._get_alertmanager_credentials() - prometheus_user, prometheus_password = self._get_prometheus_credentials() + prom_cred_hash = None + alertmgr_cred_hash = None + security_enabled, mgmt_gw_enabled, _ = self._get_security_config() + if security_enabled: + alertmanager_user, alertmanager_password = self._get_alertmanager_credentials() + prometheus_user, prometheus_password = self._get_prometheus_credentials() + if prometheus_user and prometheus_password: + prom_cred_hash = f'{utils.md5_hash(prometheus_user + prometheus_password)}' + if alertmanager_user and alertmanager_password: + alertmgr_cred_hash = f'{utils.md5_hash(alertmanager_user + alertmanager_password)}' deps = [] if daemon_type == 'haproxy': @@ -3049,9 +3060,10 @@ Then run the following: else: deps = [self.get_mgr_ip()] elif daemon_type == 'prometheus': - # for prometheus we add the active mgr as an explicit dependency, - # this way we force a redeploy after a mgr failover - deps.append(self.get_active_mgr().name()) + if not mgmt_gw_enabled: + # for prometheus we add the active mgr as an explicit dependency, + # this way we force a redeploy after a mgr failover + deps.append(self.get_active_mgr().name()) deps.append(str(self.get_module_option_ex('prometheus', 'server_port', 9283))) deps.append(str(self.service_discovery_port)) # prometheus yaml configuration file (generated by prometheus.yml.j2) contains @@ -3068,22 +3080,20 @@ Then run the following: deps += [d.name() for d in self.cache.get_daemons_by_service('ceph-exporter')] deps += [d.name() for d in self.cache.get_daemons_by_service('mgmt-gateway')] deps += [d.name() for d in self.cache.get_daemons_by_service('oauth2-proxy')] - security_enabled, _, _ = self._get_security_config() - if security_enabled: - if prometheus_user and prometheus_password: - deps.append(f'{hash(prometheus_user + prometheus_password)}') - if alertmanager_user and alertmanager_password: - deps.append(f'{hash(alertmanager_user + alertmanager_password)}') + if prom_cred_hash is not None: + deps.append(prom_cred_hash) + if alertmgr_cred_hash is not None: + deps.append(alertmgr_cred_hash) elif daemon_type == 'grafana': deps += get_daemon_names(['prometheus', 'loki', 'mgmt-gateway', 'oauth2-proxy']) - security_enabled, _, _ = self._get_security_config() - if security_enabled and prometheus_user and prometheus_password: - deps.append(f'{hash(prometheus_user + prometheus_password)}') + if prom_cred_hash is not None: + deps.append(prom_cred_hash) elif daemon_type == 'alertmanager': - deps += get_daemon_names(['mgr', 'alertmanager', 'snmp-gateway', 'mgmt-gateway', 'oauth2-proxy']) - security_enabled, _, _ = self._get_security_config() - if security_enabled and alertmanager_user and alertmanager_password: - deps.append(f'{hash(alertmanager_user + alertmanager_password)}') + deps += get_daemon_names(['alertmanager', 'snmp-gateway', 'mgmt-gateway', 'oauth2-proxy']) + if not mgmt_gw_enabled: + deps += get_daemon_names(['mgr']) + if alertmgr_cred_hash is not None: + deps.append(alertmgr_cred_hash) elif daemon_type == 'promtail': deps += get_daemon_names(['loki']) elif daemon_type in ['ceph-exporter', 'node-exporter']: @@ -3097,7 +3107,7 @@ Then run the following: elif daemon_type == 'mgmt-gateway': # url_prefix for monitoring daemons depends on the presence of mgmt-gateway # while dashboard urls depend on the mgr daemons - deps += get_daemon_names(['mgr', 'grafana', 'prometheus', 'alertmanager', 'oauth2-proxy']) + deps += get_daemon_names(['grafana', 'prometheus', 'alertmanager', 'oauth2-proxy']) else: # this daemon type doesn't need deps mgmt pass diff --git a/src/pybind/mgr/cephadm/services/ingress.py b/src/pybind/mgr/cephadm/services/ingress.py index a17000cd6327d..7381ef67d7e06 100644 --- a/src/pybind/mgr/cephadm/services/ingress.py +++ b/src/pybind/mgr/cephadm/services/ingress.py @@ -241,7 +241,12 @@ class IngressService(CephService): if spec.keepalived_password: password = spec.keepalived_password - daemons = self.mgr.cache.get_daemons_by_service(spec.service_name()) + if spec.keepalive_only: + # when keepalive_only instead of haproxy, we have to monitor the backend service daemons + if spec.backend_service is not None: + daemons = self.mgr.cache.get_daemons_by_service(spec.backend_service) + else: + daemons = self.mgr.cache.get_daemons_by_service(spec.service_name()) if not daemons and not spec.keepalive_only: raise OrchestratorError( @@ -297,6 +302,10 @@ class IngressService(CephService): port = d.ports[1] # monitoring port host_ip = d.ip or self.mgr.inventory.get_addr(d.hostname) script = f'/usr/bin/curl {build_url(scheme="http", host=host_ip, port=port)}/health' + elif d.daemon_type == 'mgmt-gateway': + mgmt_gw_port = d.ports[0] if d.ports else None + host_ip = d.ip or self.mgr.inventory.get_addr(d.hostname) + script = f'/usr/bin/curl -k {build_url(scheme="https", host=host_ip, port=mgmt_gw_port)}/health' assert script states = [] diff --git a/src/pybind/mgr/cephadm/services/mgmt_gateway.py b/src/pybind/mgr/cephadm/services/mgmt_gateway.py index 1943264025e29..59b9ed6b14938 100644 --- a/src/pybind/mgr/cephadm/services/mgmt_gateway.py +++ b/src/pybind/mgr/cephadm/services/mgmt_gateway.py @@ -1,5 +1,5 @@ import logging -from typing import List, Any, Tuple, Dict, cast, Optional +from typing import List, Any, Tuple, Dict, cast, TYPE_CHECKING from orchestrator import DaemonDescription from ceph.deployment.service_spec import MgmtGatewaySpec, GrafanaSpec @@ -36,10 +36,11 @@ class MgmtGatewayService(CephadmService): # if empty list provided, return empty Daemon Desc return DaemonDescription() - def get_oauth2_service_url(self) -> Optional[str]: - # TODO(redo): check how can we create several servers for HA - oauth2_servers = self.get_service_endpoints('oauth2-proxy') - return f'https://{oauth2_servers[0]}' if oauth2_servers else None + def get_mgmt_gw_ips(self, svc_spec: MgmtGatewaySpec, daemon_spec: CephadmDaemonDeploySpec) -> List[str]: + mgmt_gw_ips = [self.mgr.inventory.get_addr(daemon_spec.host)] + if svc_spec.virtual_ip is not None: + mgmt_gw_ips.append(svc_spec.virtual_ip) + return mgmt_gw_ips def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: # we adjust the standby behaviour so rev-proxy can pick correctly the active instance @@ -56,9 +57,9 @@ class MgmtGatewayService(CephadmService): key = svc_spec.ssl_certificate_key else: # not provided on the spec, let's generate self-sigend certificates - addr = self.mgr.inventory.get_addr(daemon_spec.host) + ips = self.get_mgmt_gw_ips(svc_spec, daemon_spec) host_fqdn = self.mgr.get_fqdn(daemon_spec.host) - cert, key = self.mgr.cert_mgr.generate_cert(host_fqdn, addr) + cert, key = self.mgr.cert_mgr.generate_cert(host_fqdn, ips) # save certificates if cert and key: self.mgr.cert_key_store.save_cert('mgmt_gw_cert', cert) @@ -67,10 +68,18 @@ class MgmtGatewayService(CephadmService): logger.error("Failed to obtain certificate and key from mgmt-gateway.") return cert, key - def get_internal_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str]: - node_ip = self.mgr.inventory.get_addr(daemon_spec.host) + def get_internal_certificates(self, svc_spec: MgmtGatewaySpec, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str]: + ips = self.get_mgmt_gw_ips(svc_spec, daemon_spec) host_fqdn = self.mgr.get_fqdn(daemon_spec.host) - return self.mgr.cert_mgr.generate_cert(host_fqdn, node_ip) + return self.mgr.cert_mgr.generate_cert(host_fqdn, ips) + + def get_service_discovery_endpoints(self) -> List[str]: + sd_endpoints = [] + for dd in self.mgr.cache.get_daemons_by_service('mgr'): + assert dd.hostname is not None + addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname) + sd_endpoints.append(f"{addr}:{self.mgr.service_discovery_port}") + return sd_endpoints def get_mgmt_gateway_deps(self) -> List[str]: # url_prefix for the following services depends on the presence of mgmt-gateway @@ -79,10 +88,6 @@ class MgmtGatewayService(CephadmService): deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('alertmanager')] deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('grafana')] deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('oauth2-proxy')] - for dd in self.mgr.cache.get_daemons_by_service('mgr'): - # we consider mgr a dep even if the dashboard is disabled - # in order to be consistent with _calc_daemon_deps(). - deps.append(dd.name()) return deps @@ -94,6 +99,8 @@ class MgmtGatewayService(CephadmService): prometheus_endpoints = self.get_service_endpoints('prometheus') alertmanager_endpoints = self.get_service_endpoints('alertmanager') grafana_endpoints = self.get_service_endpoints('grafana') + oauth2_proxy_endpoints = self.get_service_endpoints('oauth2-proxy') + service_discovery_endpoints = self.get_service_discovery_endpoints() try: grafana_spec = cast(GrafanaSpec, self.mgr.spec_store['grafana'].spec) grafana_protocol = grafana_spec.protocol @@ -104,7 +111,9 @@ class MgmtGatewayService(CephadmService): 'dashboard_endpoints': dashboard_endpoints, 'prometheus_endpoints': prometheus_endpoints, 'alertmanager_endpoints': alertmanager_endpoints, - 'grafana_endpoints': grafana_endpoints + 'grafana_endpoints': grafana_endpoints, + 'oauth2_proxy_endpoints': oauth2_proxy_endpoints, + 'service_discovery_endpoints': service_discovery_endpoints } server_context = { 'spec': svc_spec, @@ -117,11 +126,12 @@ class MgmtGatewayService(CephadmService): 'prometheus_endpoints': prometheus_endpoints, 'alertmanager_endpoints': alertmanager_endpoints, 'grafana_endpoints': grafana_endpoints, - 'oauth2_proxy_url': self.get_oauth2_service_url(), + 'service_discovery_endpoints': service_discovery_endpoints, + 'enable_oauth2_proxy': bool(oauth2_proxy_endpoints), } cert, key = self.get_external_certificates(svc_spec, daemon_spec) - internal_cert, internal_pkey = self.get_internal_certificates(daemon_spec) + internal_cert, internal_pkey = self.get_internal_certificates(svc_spec, daemon_spec) daemon_config = { "files": { "nginx.conf": self.mgr.template.render(self.SVC_TEMPLATE_PATH, main_context), diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index 6a57e3b31ef63..1b9cf6185708e 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -8,10 +8,11 @@ from mgr_module import HandleCommandResult from orchestrator import DaemonDescription from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, \ - SNMPGatewaySpec, PrometheusSpec + SNMPGatewaySpec, PrometheusSpec, MgmtGatewaySpec from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec, get_dashboard_urls from mgr_util import verify_tls, ServerConfigException, build_url, get_cert_issuer_info, password_hash from ceph.deployment.utils import wrap_ipv6 +from .. import utils logger = logging.getLogger(__name__) @@ -57,15 +58,17 @@ class GrafanaService(CephadmService): daemon_spec.port_ips = {str(grafana_port): ip_to_bind_to} grafana_ip = ip_to_bind_to - mgmt_gw_ip = None domain = self.mgr.get_fqdn(daemon_spec.host) + mgmt_gw_ips = [] if mgmt_gw_enabled: mgmt_gw_daemons = self.mgr.cache.get_daemons_by_service('mgmt-gateway') if mgmt_gw_daemons: dd = mgmt_gw_daemons[0] assert dd.hostname - domain = self.mgr.get_fqdn(dd.hostname) - mgmt_gw_ip = self.mgr.inventory.get_addr(dd.hostname) + mgmt_gw_spec = cast(MgmtGatewaySpec, self.mgr.spec_store['mgmt-gateway'].spec) + # TODO(redo): should we resolve the virtual_ip to a name if possible? + domain = mgmt_gw_spec.virtual_ip or self.mgr.get_fqdn(dd.hostname) # give prio to VIP if configured + mgmt_gw_ips = [self.mgr.inventory.get_addr(dd.hostname) for dd in mgmt_gw_daemons] # type: ignore return self.mgr.template.render('services/grafana/grafana.ini.j2', { 'anonymous_access': spec.anonymous_access, @@ -76,7 +79,7 @@ class GrafanaService(CephadmService): 'domain': domain, 'mgmt_gw_enabled': mgmt_gw_enabled, 'oauth2_enabled': oauth2_enabled, - 'mgmt_gw_ip': mgmt_gw_ip, + 'mgmt_gw_ips': ','.join(mgmt_gw_ips), }) def calculate_grafana_deps(self, security_enabled: bool) -> List[str]: @@ -87,7 +90,7 @@ class GrafanaService(CephadmService): # in case security is enabled we have to reconfig when prom user/pass changes prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials() if security_enabled and prometheus_user and prometheus_password: - deps.append(f'{hash(prometheus_user + prometheus_password)}') + deps.append(f'{utils.md5_hash(prometheus_user + prometheus_password)}') # adding a dependency for mgmt-gateway because the usage of url_prefix relies on its presence. # another dependency is added for oauth-proxy as Grafana login is delegated to this service when enabled. @@ -311,17 +314,18 @@ class AlertmanagerService(CephadmService): # add a dependency since enbling basic-auth (or not) depends on the existence of 'oauth2-proxy' deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('oauth2-proxy')] - # scan all mgrs to generate deps and to get standbys too. - for dd in self.mgr.cache.get_daemons_by_service('mgr'): - # we consider mgr a dep even if the dashboard is disabled - # in order to be consistent with _calc_daemon_deps(). - deps.append(dd.name()) - security_enabled, mgmt_gw_enabled, oauth2_enabled = self.mgr._get_security_config() if mgmt_gw_enabled: dashboard_urls = [f'{self.mgr.get_mgmt_gw_internal_endpoint()}/dashboard'] else: dashboard_urls = get_dashboard_urls(self) + # scan all mgrs to generate deps and to get standbys too. + for dd in self.mgr.cache.get_daemons_by_service('mgr'): + # we consider mgr a dep even if the dashboard is disabled + # in order to be consistent with _calc_daemon_deps(). + # when mgmt_gw is enabled there's no need for mgr dep as + # mgmt-gw wil route to the active mgr automatically + deps.append(dd.name()) snmp_gateway_urls: List[str] = [] for dd in self.mgr.cache.get_daemons_by_service('snmp-gateway'): @@ -354,7 +358,7 @@ class AlertmanagerService(CephadmService): if security_enabled: alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials() if alertmanager_user and alertmanager_password: - deps.append(f'{hash(alertmanager_user + alertmanager_password)}') + deps.append(f'{utils.md5_hash(alertmanager_user + alertmanager_password)}') cert, key = self.get_alertmanager_certificates(daemon_spec) context = { 'enable_mtls': mgmt_gw_enabled, @@ -489,8 +493,14 @@ class PrometheusService(CephadmService): security_enabled, mgmt_gw_enabled, oauth2_enabled = self.mgr._get_security_config() port = self.mgr.service_discovery_port mgr_addr = wrap_ipv6(self.mgr.get_mgr_ip()) + protocol = 'https' if security_enabled else 'http' - srv_end_point = f'{protocol}://{mgr_addr}:{port}/sd/prometheus/sd-config?' + self.mgr.get_mgmt_gw_internal_endpoint() + if mgmt_gw_enabled: + service_discovery_url_prefix = f'{self.mgr.get_mgmt_gw_internal_endpoint()}' + else: + service_discovery_url_prefix = f'{protocol}://{mgr_addr}:{port}' + srv_end_point = f'{service_discovery_url_prefix}/sd/prometheus/sd-config?' node_exporter_cnt = len(self.mgr.cache.get_daemons_by_service('node-exporter')) alertmgr_cnt = len(self.mgr.cache.get_daemons_by_service('alertmanager')) @@ -617,18 +627,23 @@ class PrometheusService(CephadmService): port = cast(int, self.mgr.get_module_option_ex('prometheus', 'server_port', self.DEFAULT_MGR_PROMETHEUS_PORT)) deps.append(str(port)) deps.append(str(self.mgr.service_discovery_port)) - # add an explicit dependency on the active manager. This will force to - # re-deploy prometheus if the mgr has changed (due to a fail-over i.e). - deps.append(self.mgr.get_active_mgr().name()) deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}') - security_enabled, _, _ = self.mgr._get_security_config() + security_enabled, mgmt_gw_enabled, _ = self.mgr._get_security_config() + + if not mgmt_gw_enabled: + # add an explicit dependency on the active manager. This will force to + # re-deploy prometheus if the mgr has changed (due to a fail-over i.e). + # when mgmt_gw is enabled there's no need for such dep as mgmt-gw wil + # route to the active mgr automatically + deps.append(self.mgr.get_active_mgr().name()) + if security_enabled: alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials() prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials() if prometheus_user and prometheus_password: - deps.append(f'{hash(prometheus_user + prometheus_password)}') + deps.append(f'{utils.md5_hash(prometheus_user + prometheus_password)}') if alertmanager_user and alertmanager_password: - deps.append(f'{hash(alertmanager_user + alertmanager_password)}') + deps.append(f'{utils.md5_hash(alertmanager_user + alertmanager_password)}') # add a dependency since url_prefix depends on the existence of mgmt-gateway deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('mgmt-gateway')] diff --git a/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 b/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 index 967f1355af14b..c767baddbb7ac 100644 --- a/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 +++ b/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 @@ -39,7 +39,7 @@ header_property = username auto_sign_up = true sync_ttl = 15 - whitelist = {{ mgmt_gw_ip }} + whitelist = {{ mgmt_gw_ips }} headers_encoded = false enable_login_token = false headers = Role:X-WEBAUTH-ROLE diff --git a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2 b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2 index 594582e7ee4c9..50a61f843d12b 100644 --- a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2 +++ b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2 @@ -46,15 +46,15 @@ server { # add_header Content-Security-Policy "default-src 'self'; script-src 'self'; object-src 'none'; base-uri 'none'; require-trusted-types-for 'script'; frame-ancestors 'self';"; {% endif %} -{% if spec.enable_health_check_endpoint %} +{% if spec.enable_health_check_endpoint or spec.virtual_ip %} location /health { return 200 'OK'; add_header Content-Type text/plain; } {% endif %} -{% if oauth2_proxy_url %} +{% if enable_oauth2_proxy %} location /oauth2/ { - proxy_pass {{ oauth2_proxy_url }}; + proxy_pass https://oauth2_proxy_servers; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Scheme $scheme; @@ -64,7 +64,7 @@ server { location = /oauth2/auth { internal; - proxy_pass {{ oauth2_proxy_url }}; + proxy_pass https://oauth2_proxy_servers; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Scheme $scheme; @@ -78,7 +78,7 @@ server { location / { proxy_pass {{ dashboard_scheme }}://dashboard_servers; proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; - {% if oauth2_proxy_url %} + {% if enable_oauth2_proxy %} auth_request /oauth2/auth; error_page 401 = /oauth2/sign_in; @@ -120,7 +120,7 @@ server { # will send this header if Grafana is running on the same node as one of those services proxy_set_header Authorization ""; proxy_buffering off; - {% if oauth2_proxy_url %} + {% if enable_oauth2_proxy %} auth_request /oauth2/auth; error_page 401 = /oauth2/sign_in; @@ -156,7 +156,7 @@ server { proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt; proxy_ssl_verify on; proxy_ssl_verify_depth 2; - {% if oauth2_proxy_url %} + {% if enable_oauth2_proxy %} auth_request /oauth2/auth; error_page 401 = /oauth2/sign_in; @@ -180,7 +180,7 @@ server { proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt; proxy_ssl_verify on; proxy_ssl_verify_depth 2; - {% if oauth2_proxy_url %} + {% if enable_oauth2_proxy %} auth_request /oauth2/auth; error_page 401 = /oauth2/sign_in; diff --git a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2 b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2 index 9148ddc4a142d..2abb24b2eba64 100644 --- a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2 +++ b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2 @@ -12,12 +12,20 @@ server { ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:DHE-RSA-CHACHA20-POLY1305; ssl_prefer_server_ciphers on; -{% if spec.enable_health_check_endpoint %} +{% if spec.enable_health_check_endpoint or spec.virtual_ip %} location /health { return 200 'OK'; add_header Content-Type text/plain; } {% endif %} +{% if service_discovery_endpoints %} + location /internal/sd { + rewrite ^/internal/(.*) /$1 break; + proxy_pass https://service_discovery_servers; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + } +{% endif %} + {% if dashboard_endpoints %} location /internal/dashboard { rewrite ^/internal/dashboard/(.*) /$1 break; diff --git a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2 b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2 index 0c2a6b98c3ba0..b9773ceeeb3c9 100644 --- a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2 +++ b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2 @@ -8,6 +8,7 @@ events { http { + #access_log /dev/stdout; client_header_buffer_size 32K; large_client_header_buffers 4 32k; proxy_busy_buffers_size 512k; @@ -16,6 +17,22 @@ http { proxy_headers_hash_max_size 1024; proxy_headers_hash_bucket_size 128; +{% if oauth2_proxy_endpoints %} + upstream oauth2_proxy_servers { + {% for ep in oauth2_proxy_endpoints %} + server {{ ep }}; + {% endfor %} + } +{% endif %} + +{% if service_discovery_endpoints %} + upstream service_discovery_servers { + {% for ep in service_discovery_endpoints %} + server {{ ep }}; + {% endfor %} + } +{% endif %} + {% if dashboard_endpoints %} upstream dashboard_servers { {% for ep in dashboard_endpoints %} diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py index 459ab7df1a002..eed4b2b5d112c 100644 --- a/src/python-common/ceph/deployment/service_spec.py +++ b/src/python-common/ceph/deployment/service_spec.py @@ -1762,7 +1762,7 @@ class IngressSpec(ServiceSpec): if not self.keepalive_only and not self.frontend_port: raise SpecValidationError( 'Cannot add ingress: No frontend_port specified') - if not self.monitor_port: + if not self.keepalive_only and not self.monitor_port: raise SpecValidationError( 'Cannot add ingress: No monitor_port specified') if not self.virtual_ip and not self.virtual_ips_list: @@ -1805,6 +1805,7 @@ class MgmtGatewaySpec(ServiceSpec): ssl_protocols: Optional[List[str]] = None, ssl_ciphers: Optional[List[str]] = None, enable_health_check_endpoint: bool = False, + virtual_ip: Optional[str] = None, preview_only: bool = False, unmanaged: bool = False, extra_container_args: Optional[GeneralArgList] = None, @@ -1851,6 +1852,7 @@ class MgmtGatewaySpec(ServiceSpec): #: List of supported secure SSL ciphers. Changing this list may reduce system security. self.ssl_ciphers = ssl_ciphers self.enable_health_check_endpoint = enable_health_check_endpoint + self.virtual_ip = virtual_ip def get_port_start(self) -> List[int]: ports = []