From: Redouane Kachach Date: Fri, 17 May 2024 13:19:35 +0000 (+0200) Subject: mgr/cephadm: adding new cephadm service mgmt-gateway X-Git-Tag: testing/wip-vshankar-testing-20240718.183435-debug~74^2~2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=17481c15288c6d115acbae0c2632f5c54ed325a0;p=ceph-ci.git mgr/cephadm: adding new cephadm service mgmt-gateway adding mgmt-gateway, a new cephadm service based on nginx, to act as the front-end and single entry point to the cluster. This gateway offers unified access to all Ceph applications, including the Ceph dashboard and monitoring tools (Prometheus, Grafana, ..), while enhancing security and simplifying access management through nginx. Fixes: https://tracker.ceph.com/issues/66095 Signed-off-by: Redouane Kachach --- diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py index 8949f1ce6bf..95169358f3a 100755 --- a/src/cephadm/cephadm.py +++ b/src/cephadm/cephadm.py @@ -176,6 +176,7 @@ from cephadmlib.daemons import ( NFSGanesha, SMB, SNMPGateway, + MgmtGateway, Tracing, NodeProxy, ) @@ -227,6 +228,7 @@ def get_supported_daemons(): supported_daemons.append(Keepalived.daemon_type) supported_daemons.append(CephadmAgent.daemon_type) supported_daemons.append(SNMPGateway.daemon_type) + supported_daemons.append(MgmtGateway.daemon_type) supported_daemons.extend(Tracing.components) supported_daemons.append(NodeProxy.daemon_type) supported_daemons.append(SMB.daemon_type) @@ -463,6 +465,8 @@ def update_default_image(ctx: CephadmContext) -> None: ctx.image = Keepalived.default_image if type_ == SNMPGateway.daemon_type: ctx.image = SNMPGateway.default_image + if type_ == MgmtGateway.daemon_type: + ctx.image = MgmtGateway.default_image if type_ == CephNvmeof.daemon_type: ctx.image = CephNvmeof.default_image if type_ in Tracing.components: @@ -855,6 +859,10 @@ def create_daemon_dirs( sg = SNMPGateway.init(ctx, fsid, ident.daemon_id) sg.create_daemon_conf() + elif daemon_type == MgmtGateway.daemon_type: + cg = MgmtGateway.init(ctx, fsid, ident.daemon_id) + cg.create_daemon_dirs(data_dir, uid, gid) + elif daemon_type == NodeProxy.daemon_type: node_proxy = NodeProxy.init(ctx, fsid, ident.daemon_id) node_proxy.create_daemon_dirs(data_dir, uid, gid) @@ -3571,6 +3579,9 @@ def list_daemons( elif daemon_type == SNMPGateway.daemon_type: version = SNMPGateway.get_version(ctx, fsid, daemon_id) seen_versions[image_id] = version + elif daemon_type == MgmtGateway.daemon_type: + version = MgmtGateway.get_version(ctx, container_id) + seen_versions[image_id] = version else: logger.warning('version for unknown daemon type %s' % daemon_type) else: diff --git a/src/cephadm/cephadmlib/constants.py b/src/cephadm/cephadmlib/constants.py index a6cf4389ff6..41add9fd10d 100644 --- a/src/cephadm/cephadmlib/constants.py +++ b/src/cephadm/cephadmlib/constants.py @@ -19,6 +19,7 @@ DEFAULT_JAEGER_COLLECTOR_IMAGE = 'quay.io/jaegertracing/jaeger-collector:1.29' DEFAULT_JAEGER_AGENT_IMAGE = 'quay.io/jaegertracing/jaeger-agent:1.29' DEFAULT_JAEGER_QUERY_IMAGE = 'quay.io/jaegertracing/jaeger-query:1.29' DEFAULT_SMB_IMAGE = 'quay.io/samba.org/samba-server:devbuilds-centos-amd64' +DEFAULT_NGINX_IMAGE = 'quay.io/ceph/nginx:1.26.1' DEFAULT_REGISTRY = 'docker.io' # normalize unqualified digests to this # ------------------------------------------------------------------------------ diff --git a/src/cephadm/cephadmlib/daemons/__init__.py b/src/cephadm/cephadmlib/daemons/__init__.py index 1a9d2d568bc..279f6f1a898 100644 --- a/src/cephadm/cephadmlib/daemons/__init__.py +++ b/src/cephadm/cephadmlib/daemons/__init__.py @@ -9,6 +9,7 @@ from .smb import SMB from .snmp import SNMPGateway from .tracing import Tracing from .node_proxy import NodeProxy +from .mgmt_gateway import MgmtGateway __all__ = [ 'Ceph', @@ -25,4 +26,5 @@ __all__ = [ 'SNMPGateway', 'Tracing', 'NodeProxy', + 'MgmtGateway', ] diff --git a/src/cephadm/cephadmlib/daemons/mgmt_gateway.py b/src/cephadm/cephadmlib/daemons/mgmt_gateway.py new file mode 100644 index 00000000000..f3b28bcccc7 --- /dev/null +++ b/src/cephadm/cephadmlib/daemons/mgmt_gateway.py @@ -0,0 +1,174 @@ +import logging +import os +from typing import Dict, List, Tuple, Optional +import re + +from ..call_wrappers import call, CallVerbosity +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container +from ..container_types import CephContainer +from ..context import CephadmContext +from ..context_getters import fetch_configs +from ..daemon_form import register as register_daemon_form +from ..daemon_identity import DaemonIdentity +from ..deployment_utils import to_deployment_container +from ..constants import DEFAULT_NGINX_IMAGE +from ..data_utils import dict_get, is_fsid +from ..file_utils import populate_files, makedirs, recursive_chown +from ..exceptions import Error + +logger = logging.getLogger() + + +@register_daemon_form +class MgmtGateway(ContainerDaemonForm): + """Defines an MgmtGateway container""" + + daemon_type = 'mgmt-gateway' + required_files = [ + 'nginx.conf', + 'nginx_external_server.conf', + 'nginx_internal_server.conf', + 'nginx_internal.crt', + 'nginx_internal.key', + ] + + default_image = DEFAULT_NGINX_IMAGE + + @classmethod + def for_daemon_type(cls, daemon_type: str) -> bool: + return cls.daemon_type == daemon_type + + def __init__( + self, + ctx: CephadmContext, + fsid: str, + daemon_id: str, + config_json: Dict, + image: str = DEFAULT_NGINX_IMAGE, + ): + self.ctx = ctx + self.fsid = fsid + self.daemon_id = daemon_id + self.image = image + self.files = dict_get(config_json, 'files', {}) + self.validate() + + @classmethod + def init( + cls, ctx: CephadmContext, fsid: str, daemon_id: str + ) -> 'MgmtGateway': + return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image) + + @classmethod + def create( + cls, ctx: CephadmContext, ident: DaemonIdentity + ) -> 'MgmtGateway': + return cls.init(ctx, ident.fsid, ident.daemon_id) + + @property + def identity(self) -> DaemonIdentity: + return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id) + + def validate(self) -> None: + if not is_fsid(self.fsid): + raise Error(f'not an fsid: {self.fsid}') + if not self.daemon_id: + raise Error(f'invalid daemon_id: {self.daemon_id}') + if not self.image: + raise Error(f'invalid image: {self.image}') + + # check for the required files + if self.required_files: + for fname in self.required_files: + if fname not in self.files: + raise Error( + 'required file missing from config-json: %s' % fname + ) + + def container(self, ctx: CephadmContext) -> CephContainer: + ctr = daemon_to_container(ctx, self) + return to_deployment_container(ctx, ctr) + + def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: + return 65534, 65534 # nobody, nobody + + def get_daemon_args(self) -> List[str]: + return [] + + def default_entrypoint(self) -> str: + return '' + + def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None: + """Create files under the container data dir""" + if not os.path.isdir(data_dir): + raise OSError('data_dir is not a directory: %s' % (data_dir)) + logger.info('Writing mgmt-gateway config...') + config_dir = os.path.join(data_dir, 'etc/') + makedirs(config_dir, uid, gid, 0o755) + recursive_chown(config_dir, uid, gid) + populate_files(config_dir, self.files, uid, gid) + + def _get_container_mounts(self, data_dir: str) -> Dict[str, str]: + mounts: Dict[str, str] = {} + mounts[ + os.path.join(data_dir, 'nginx.conf') + ] = '/etc/nginx/nginx.conf:Z' + return mounts + + @staticmethod + def get_version(ctx: CephadmContext, container_id: str) -> Optional[str]: + """Return the version of the Nginx container""" + version = None + out, err, code = call( + ctx, + [ + ctx.container_engine.path, + 'exec', + container_id, + 'nginx', + '-v', + ], + verbosity=CallVerbosity.QUIET, + ) + if code == 0: + # nginx is using stderr to print the version!! + match = re.search(r'nginx version:\s*nginx\/(.+)', err) + if match: + version = match.group(1) + return version + + def customize_container_mounts( + self, ctx: CephadmContext, mounts: Dict[str, str] + ) -> None: + data_dir = self.identity.data_dir(ctx.data_dir) + mounts.update( + { + os.path.join( + data_dir, 'etc/nginx.conf' + ): '/etc/nginx/nginx.conf:Z', + os.path.join( + data_dir, 'etc/nginx_internal_server.conf' + ): '/etc/nginx_internal_server.conf:Z', + os.path.join( + data_dir, 'etc/nginx_external_server.conf' + ): '/etc/nginx_external_server.conf:Z', + os.path.join( + data_dir, 'etc/nginx_internal.crt' + ): '/etc/nginx/ssl/nginx_internal.crt:Z', + os.path.join( + data_dir, 'etc/nginx_internal.key' + ): '/etc/nginx/ssl/nginx_internal.key:Z', + } + ) + + if 'nginx.crt' in self.files: + mounts.update( + { + os.path.join( + data_dir, 'etc/nginx.crt' + ): '/etc/nginx/ssl/nginx.crt:Z', + os.path.join( + data_dir, 'etc/nginx.key' + ): '/etc/nginx/ssl/nginx.key:Z', + } + ) diff --git a/src/cephadm/cephadmlib/daemons/monitoring.py b/src/cephadm/cephadmlib/daemons/monitoring.py index aa93ebe7305..55e6a9458f0 100644 --- a/src/cephadm/cephadmlib/daemons/monitoring.py +++ b/src/cephadm/cephadmlib/daemons/monitoring.py @@ -260,6 +260,7 @@ class Monitoring(ContainerDaemonForm): retention_size = config.get( 'retention_size', '0' ) # default to disabled + use_url_prefix = config.get('use_url_prefix', False) r += [f'--storage.tsdb.retention.time={retention_time}'] r += [f'--storage.tsdb.retention.size={retention_size}'] scheme = 'http' @@ -271,10 +272,17 @@ class Monitoring(ContainerDaemonForm): # use the first ipv4 (if any) otherwise use the first ipv6 addr = next(iter(ipv4_addrs or ipv6_addrs), None) host = wrap_ipv6(addr) if addr else host - r += [f'--web.external-url={scheme}://{host}:{port}'] + if use_url_prefix: + r += [ + f'--web.external-url={scheme}://{host}:{port}/prometheus' + ] + r += ['--web.route-prefix=/prometheus/'] + else: + r += [f'--web.external-url={scheme}://{host}:{port}'] r += [f'--web.listen-address={ip}:{port}'] if daemon_type == 'alertmanager': config = fetch_configs(ctx) + use_url_prefix = config.get('use_url_prefix', False) peers = config.get('peers', list()) # type: ignore for peer in peers: r += ['--cluster.peer={}'.format(peer)] @@ -284,6 +292,8 @@ class Monitoring(ContainerDaemonForm): pass # some alertmanager, by default, look elsewhere for a config r += ['--config.file=/etc/alertmanager/alertmanager.yml'] + if use_url_prefix: + r += ['--web.route-prefix=/alertmanager'] if daemon_type == 'promtail': r += ['--config.expand-env'] if daemon_type == 'prometheus': diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py index b86b6ff80b9..492a9a98d34 100644 --- a/src/pybind/mgr/cephadm/inventory.py +++ b/src/pybind/mgr/cephadm/inventory.py @@ -1889,6 +1889,7 @@ class CertKeyStore(): 'iscsi_ssl_cert': {}, # service-name -> cert 'ingress_ssl_cert': {}, # service-name -> cert 'agent_endpoint_root_cert': Cert(), # cert + 'mgmt_gw_root_cert': Cert(), # cert 'service_discovery_root_cert': Cert(), # cert 'grafana_cert': {}, # host -> cert 'alertmanager_cert': {}, # host -> cert @@ -1901,6 +1902,7 @@ class CertKeyStore(): self.known_keys = { 'agent_endpoint_key': PrivKey(), # key 'service_discovery_key': PrivKey(), # key + 'mgmt_gw_root_key': PrivKey(), # cert 'grafana_key': {}, # host -> key 'alertmanager_key': {}, # host -> key 'prometheus_key': {}, # host -> key diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index b68d571de68..bd7981edc56 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -68,6 +68,7 @@ from .services.ingress import IngressService from .services.container import CustomContainerService from .services.iscsi import IscsiService from .services.nvmeof import NvmeofService +from .services.mgmt_gateway import MgmtGatewayService from .services.nfs import NFSService from .services.osd import OSDRemovalQueue, OSDService, OSD, NotFoundError from .services.monitoring import GrafanaService, AlertmanagerService, PrometheusService, \ @@ -139,6 +140,7 @@ DEFAULT_SNMP_GATEWAY_IMAGE = 'docker.io/maxwo/snmp-notifier:v1.2.1' DEFAULT_ELASTICSEARCH_IMAGE = 'quay.io/omrizeneva/elasticsearch:6.8.23' DEFAULT_JAEGER_COLLECTOR_IMAGE = 'quay.io/jaegertracing/jaeger-collector:1.29' DEFAULT_JAEGER_AGENT_IMAGE = 'quay.io/jaegertracing/jaeger-agent:1.29' +DEFAULT_NGINX_IMAGE = 'quay.io/ceph/nginx:1.26.1' DEFAULT_JAEGER_QUERY_IMAGE = 'quay.io/jaegertracing/jaeger-query:1.29' DEFAULT_SAMBA_IMAGE = 'quay.io/samba.org/samba-server:devbuilds-centos-amd64' # ------------------------------------------------------------------------------ @@ -277,6 +279,11 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, default=DEFAULT_SNMP_GATEWAY_IMAGE, desc='SNMP Gateway container image', ), + Option( + 'container_image_nginx', + default=DEFAULT_NGINX_IMAGE, + desc='Nginx container image', + ), Option( 'container_image_elasticsearch', default=DEFAULT_ELASTICSEARCH_IMAGE, @@ -562,6 +569,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, self.container_image_haproxy = '' self.container_image_keepalived = '' self.container_image_snmp_gateway = '' + self.container_image_nginx = '' self.container_image_elasticsearch = '' self.container_image_jaeger_agent = '' self.container_image_jaeger_collector = '' @@ -708,6 +716,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, RgwService, SMBService, SNMPGatewayService, + MgmtGatewayService, ] # https://github.com/python/mypy/issues/8993 @@ -918,7 +927,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, 'mon', 'crash', 'ceph-exporter', 'node-proxy', 'prometheus', 'node-exporter', 'grafana', 'alertmanager', 'container', 'agent', 'snmp-gateway', 'loki', 'promtail', - 'elasticsearch', 'jaeger-collector', 'jaeger-agent', 'jaeger-query' + 'elasticsearch', 'jaeger-collector', 'jaeger-agent', 'jaeger-query', 'mgmt-gateway' ] if forcename: if len([d for d in existing if d.daemon_id == forcename]): @@ -1650,6 +1659,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, 'prometheus': self.container_image_prometheus, 'promtail': self.container_image_promtail, 'snmp-gateway': self.container_image_snmp_gateway, + 'mgmt-gateway': self.container_image_nginx, # The image can't be resolved here, the necessary information # is only available when a container is deployed (given # via spec). @@ -2926,17 +2936,18 @@ Then run the following: deps.append('ingress') # add dependency on ceph-exporter daemons deps += [d.name() for d in self.cache.get_daemons_by_service('ceph-exporter')] + deps += [d.name() for d in self.cache.get_daemons_by_service('mgmt-gateway')] if self.secure_monitoring_stack: if prometheus_user and prometheus_password: deps.append(f'{hash(prometheus_user + prometheus_password)}') if alertmanager_user and alertmanager_password: deps.append(f'{hash(alertmanager_user + alertmanager_password)}') elif daemon_type == 'grafana': - deps += get_daemon_names(['prometheus', 'loki']) + deps += get_daemon_names(['prometheus', 'loki', 'mgmt-gateway']) if self.secure_monitoring_stack and prometheus_user and prometheus_password: deps.append(f'{hash(prometheus_user + prometheus_password)}') elif daemon_type == 'alertmanager': - deps += get_daemon_names(['mgr', 'alertmanager', 'snmp-gateway']) + deps += get_daemon_names(['mgr', 'alertmanager', 'snmp-gateway', 'mgmt-gateway']) if self.secure_monitoring_stack and alertmanager_user and alertmanager_password: deps.append(f'{hash(alertmanager_user + alertmanager_password)}') elif daemon_type == 'promtail': @@ -2947,11 +2958,15 @@ Then run the following: port = dd.ports[0] if dd.ports else JaegerCollectorService.DEFAULT_SERVICE_PORT deps.append(build_url(host=dd.hostname, port=port).lstrip('/')) deps = sorted(deps) + elif daemon_type == 'mgmt-gateway': + # url_prefix for monitoring daemons depends on the presence of mgmt-gateway + # while dashboard urls depend on the mgr daemons + deps += get_daemon_names(['mgr', 'grafana', 'prometheus', 'alertmanager']) else: - # TODO(redo): some error message! + # this daemon type doesn't need deps mgmt pass - if daemon_type in ['prometheus', 'node-exporter', 'alertmanager', 'grafana']: + if daemon_type in ['prometheus', 'node-exporter', 'alertmanager', 'grafana', 'mgmt-gateway']: deps.append(f'secure_monitoring_stack:{self.secure_monitoring_stack}') return sorted(deps) @@ -3337,6 +3352,7 @@ Then run the following: 'crash': PlacementSpec(host_pattern='*'), 'container': PlacementSpec(count=1), 'snmp-gateway': PlacementSpec(count=1), + 'mgmt-gateway': PlacementSpec(count=1), 'elasticsearch': PlacementSpec(count=1), 'jaeger-agent': PlacementSpec(host_pattern='*'), 'jaeger-collector': PlacementSpec(count=1), @@ -3475,6 +3491,10 @@ Then run the following: def apply_smb(self, spec: ServiceSpec) -> str: return self._apply(spec) + @handle_orch_error + def apply_mgmt_gateway(self, spec: ServiceSpec) -> str: + return self._apply(spec) + @handle_orch_error def set_unmanaged(self, service_name: str, value: bool) -> str: return self.spec_store.set_unmanaged(service_name, value) diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 561a3e085b8..59e06bbd024 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -41,7 +41,7 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -REQUIRES_POST_ACTIONS = ['grafana', 'iscsi', 'prometheus', 'alertmanager', 'rgw', 'nvmeof'] +REQUIRES_POST_ACTIONS = ['grafana', 'iscsi', 'prometheus', 'alertmanager', 'rgw', 'nvmeof', 'mgmt-gateway'] WHICH = ssh.RemoteExecutable('which') CEPHADM_EXE = ssh.RemoteExecutable('/usr/bin/cephadm') @@ -1093,10 +1093,12 @@ class CephadmServe: self.log.debug(f'{dd.name()} deps {last_deps} -> {deps}') self.log.info(f'Reconfiguring {dd.name()} (dependencies changed)...') action = 'reconfig' - # we need only redeploy if secure_monitoring_stack value has changed: + # we need only redeploy if secure_monitoring_stack or mgmt-gateway value has changed: + # TODO(redo): check if we should just go always with redeploy (it's fast enough) if dd.daemon_type in ['prometheus', 'node-exporter', 'alertmanager']: diff = list(set(last_deps).symmetric_difference(set(deps))) - if any('secure_monitoring_stack' in e for e in diff): + REDEPLOY_TRIGGERS = ['secure_monitoring_stack', 'mgmt-gateway'] + if any(svc in e for e in diff for svc in REDEPLOY_TRIGGERS): action = 'redeploy' elif dd.daemon_type == 'jaeger-agent': # changes to jaeger-agent deps affect the way the unit.run for diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py index 4b22400b49e..ec9df98413a 100644 --- a/src/pybind/mgr/cephadm/services/cephadmservice.py +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -5,6 +5,8 @@ import re import socket import time from abc import ABCMeta, abstractmethod +import ipaddress +from urllib.parse import urlparse from typing import TYPE_CHECKING, List, Callable, TypeVar, \ Optional, Dict, Any, Tuple, NewType, cast @@ -73,6 +75,61 @@ def simplified_keyring(entity: str, contents: str) -> str: return keyring +def get_dashboard_endpoints(svc: 'CephadmService') -> Tuple[List[str], Optional[str]]: + dashboard_endpoints: List[str] = [] + port = None + protocol = None + mgr_map = svc.mgr.get('mgr_map') + url = mgr_map.get('services', {}).get('dashboard', None) + if url: + p_result = urlparse(url.rstrip('/')) + protocol = p_result.scheme + port = p_result.port + # assume that they are all dashboards on the same port as the active mgr. + for dd in svc.mgr.cache.get_daemons_by_service('mgr'): + if not port: + continue + assert dd.hostname is not None + addr = svc._inventory_get_fqdn(dd.hostname) + dashboard_endpoints.append(f'{addr}:{port}') + + return dashboard_endpoints, protocol + + +def get_dashboard_urls(svc: 'CephadmService') -> List[str]: + # dashboard(s) + dashboard_urls: List[str] = [] + mgr_map = svc.mgr.get('mgr_map') + port = None + proto = None # http: or https: + url = mgr_map.get('services', {}).get('dashboard', None) + if url: + p_result = urlparse(url.rstrip('/')) + hostname = socket.getfqdn(p_result.hostname) + try: + ip = ipaddress.ip_address(hostname) + except ValueError: + pass + else: + if ip.version == 6: + hostname = f'[{hostname}]' + dashboard_urls.append(f'{p_result.scheme}://{hostname}:{p_result.port}{p_result.path}') + proto = p_result.scheme + port = p_result.port + + # assume that they are all dashboards on the same port as the active mgr. + for dd in svc.mgr.cache.get_daemons_by_service('mgr'): + if not port: + continue + if dd.daemon_id == svc.mgr.get_mgr_id(): + continue + assert dd.hostname is not None + addr = svc._inventory_get_fqdn(dd.hostname) + dashboard_urls.append(build_url(scheme=proto, host=addr, port=port).rstrip('/')) + + return dashboard_urls + + class CephadmDaemonDeploySpec: # typing.NamedTuple + Generic is broken in py36 def __init__(self, host: str, daemon_id: str, @@ -336,22 +393,21 @@ class CephadmService(metaclass=ABCMeta): addr = self.mgr.inventory.get_addr(hostname) return socket.getfqdn(addr) - def _set_service_url_on_dashboard(self, - service_name: str, - get_mon_cmd: str, - set_mon_cmd: str, - service_url: str) -> None: - """A helper to get and set service_url via Dashboard's MON command. - - If result of get_mon_cmd differs from service_url, set_mon_cmd will + def _set_value_on_dashboard(self, + service_name: str, + get_mon_cmd: str, + set_mon_cmd: str, + new_value: str) -> None: + """A helper to get and set values via Dashboard's MON command. + If result of get_mon_cmd differs from the new_value, set_mon_cmd will be sent to set the service_url. """ def get_set_cmd_dicts(out: str) -> List[dict]: cmd_dict = { 'prefix': set_mon_cmd, - 'value': service_url + 'value': new_value } - return [cmd_dict] if service_url != out else [] + return [cmd_dict] if new_value != out else [] self._check_and_set_dashboard( service_name=service_name, diff --git a/src/pybind/mgr/cephadm/services/mgmt_gateway.py b/src/pybind/mgr/cephadm/services/mgmt_gateway.py new file mode 100644 index 00000000000..7ba59faca28 --- /dev/null +++ b/src/pybind/mgr/cephadm/services/mgmt_gateway.py @@ -0,0 +1,146 @@ +import logging +from typing import List, Any, Tuple, Dict, cast + +from orchestrator import DaemonDescription +from ceph.deployment.service_spec import MgmtGatewaySpec, GrafanaSpec +from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec, get_dashboard_endpoints +from cephadm.ssl_cert_utils import SSLCerts + +logger = logging.getLogger(__name__) + + +class MgmtGatewayService(CephadmService): + TYPE = 'mgmt-gateway' + SVC_TEMPLATE_PATH = 'services/mgmt-gateway/nginx.conf.j2' + EXTERNAL_SVC_TEMPLATE_PATH = 'services/mgmt-gateway/external_server.conf.j2' + INTERNAL_SVC_TEMPLATE_PATH = 'services/mgmt-gateway/internal_server.conf.j2' + INTERNAL_SERVICE_PORT = 29443 + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec + + def get_service_endpoints(self, service_name: str) -> List[str]: + srv_entries = [] + for dd in self.mgr.cache.get_daemons_by_service(service_name): + assert dd.hostname is not None + addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname) + port = dd.ports[0] if dd.ports else None + srv_entries.append(f'{addr}:{port}') + return srv_entries + + def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: + if daemon_descrs: + return daemon_descrs[0] + # if empty list provided, return empty Daemon Desc + return DaemonDescription() + + def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: + # we adjust the standby behaviour so rev-proxy can pick correctly the active instance + self.mgr.set_module_option_ex('dashboard', 'standby_error_status_code', '503') + self.mgr.set_module_option_ex('dashboard', 'standby_behaviour', 'error') + + def get_certificates(self, svc_spec: MgmtGatewaySpec, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str, str, str]: + self.ssl_certs = SSLCerts() + old_cert = self.mgr.cert_key_store.get_cert('mgmt_gw_root_cert') + old_key = self.mgr.cert_key_store.get_key('mgmt_gw_root_key') + if old_cert and old_key: + self.ssl_certs.load_root_credentials(old_cert, old_key) + else: + self.ssl_certs.generate_root_cert(self.mgr.get_mgr_ip()) + self.mgr.cert_key_store.save_cert('mgmt_gw_root_cert', self.ssl_certs.get_root_cert()) + self.mgr.cert_key_store.save_key('mgmt_gw_root_key', self.ssl_certs.get_root_key()) + + node_ip = self.mgr.inventory.get_addr(daemon_spec.host) + host_fqdn = self._inventory_get_fqdn(daemon_spec.host) + internal_cert, internal_pkey = self.ssl_certs.generate_cert(host_fqdn, node_ip) + cert = svc_spec.ssl_certificate + pkey = svc_spec.ssl_certificate_key + if not (cert and pkey): + # In case the user has not provided certificates then we generate self-signed ones + cert, pkey = self.ssl_certs.generate_cert(host_fqdn, node_ip) + + return internal_cert, internal_pkey, cert, pkey + + def get_mgmt_gateway_deps(self) -> List[str]: + # url_prefix for the following services depends on the presence of mgmt-gateway + deps: List[str] = [] + deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('prometheus')] + deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('alertmanager')] + deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('grafana')] + # secure_monitoring_stack affects the protocol used by monitoring services + deps += [f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}'] + for dd in self.mgr.cache.get_daemons_by_service('mgr'): + # we consider mgr a dep even if the dashboard is disabled + # in order to be consistent with _calc_daemon_deps(). + deps.append(dd.name()) + + return deps + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + assert self.TYPE == daemon_spec.daemon_type + svc_spec = cast(MgmtGatewaySpec, self.mgr.spec_store[daemon_spec.service_name].spec) + dashboard_endpoints, dashboard_scheme = get_dashboard_endpoints(self) + scheme = 'https' if self.mgr.secure_monitoring_stack else 'http' + + prometheus_endpoints = self.get_service_endpoints('prometheus') + alertmanager_endpoints = self.get_service_endpoints('alertmanager') + grafana_endpoints = self.get_service_endpoints('grafana') + try: + grafana_spec = cast(GrafanaSpec, self.mgr.spec_store['grafana'].spec) + grafana_protocol = grafana_spec.protocol + except Exception: + grafana_protocol = 'https' # defualt to https just for UT + + main_context = { + 'dashboard_endpoints': dashboard_endpoints, + 'prometheus_endpoints': prometheus_endpoints, + 'alertmanager_endpoints': alertmanager_endpoints, + 'grafana_endpoints': grafana_endpoints + } + external_server_context = { + 'spec': svc_spec, + 'dashboard_scheme': dashboard_scheme, + 'grafana_scheme': grafana_protocol, + 'prometheus_scheme': scheme, + 'alertmanager_scheme': scheme, + 'dashboard_endpoints': dashboard_endpoints, + 'prometheus_endpoints': prometheus_endpoints, + 'alertmanager_endpoints': alertmanager_endpoints, + 'grafana_endpoints': grafana_endpoints + } + internal_server_context = { + 'spec': svc_spec, + 'internal_port': self.INTERNAL_SERVICE_PORT, + 'grafana_scheme': grafana_protocol, + 'prometheus_scheme': scheme, + 'alertmanager_scheme': scheme, + 'prometheus_endpoints': prometheus_endpoints, + 'alertmanager_endpoints': alertmanager_endpoints, + 'grafana_endpoints': grafana_endpoints + } + + internal_cert, internal_pkey, cert, pkey = self.get_certificates(svc_spec, daemon_spec) + daemon_config = { + "files": { + "nginx.conf": self.mgr.template.render(self.SVC_TEMPLATE_PATH, main_context), + "nginx_external_server.conf": self.mgr.template.render(self.EXTERNAL_SVC_TEMPLATE_PATH, external_server_context), + "nginx_internal_server.conf": self.mgr.template.render(self.INTERNAL_SVC_TEMPLATE_PATH, internal_server_context), + "nginx_internal.crt": internal_cert, + "nginx_internal.key": internal_pkey + } + } + if not svc_spec.disable_https: + daemon_config["files"]["nginx.crt"] = cert + daemon_config["files"]["nginx.key"] = pkey + + return daemon_config, sorted(self.get_mgmt_gateway_deps()) + + def pre_remove(self, daemon: DaemonDescription) -> None: + """ + Called before mgmt-gateway daemon is removed. + """ + # reset the standby dashboard redirection behaviour + self.mgr.set_module_option_ex('dashboard', 'standby_error_status_code', '500') + self.mgr.set_module_option_ex('dashboard', 'standby_behaviour', 'redirect') diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index c12c637c39d..71a9b60d31f 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -1,17 +1,16 @@ import errno -import ipaddress import logging import os import socket from typing import List, Any, Tuple, Dict, Optional, cast -from urllib.parse import urlparse from mgr_module import HandleCommandResult from orchestrator import DaemonDescription from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, \ - SNMPGatewaySpec, PrometheusSpec -from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec + SNMPGatewaySpec, PrometheusSpec, MgmtGatewaySpec +from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec, get_dashboard_urls +from cephadm.services.mgmt_gateway import MgmtGatewayService from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert, build_url, get_cert_issuer_info, password_hash from ceph.deployment.utils import wrap_ipv6 @@ -35,6 +34,9 @@ class GrafanaService(CephadmService): deps.append(f'{hash(prometheus_user + prometheus_password)}') deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}') + # add a dependency since url_prefix depends on the existence of mgmt-gateway + deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('mgmt-gateway')] + prom_services = [] # type: List[str] for dd in self.mgr.cache.get_daemons_by_service('prometheus'): assert dd.hostname is not None @@ -78,13 +80,15 @@ class GrafanaService(CephadmService): daemon_spec.port_ips = {str(grafana_port): ip_to_bind_to} grafana_ip = ip_to_bind_to + mgmt_gw_enabled = len(self.mgr.cache.get_daemons_by_service('mgmt-gateway')) > 0 grafana_ini = self.mgr.template.render( 'services/grafana/grafana.ini.j2', { 'anonymous_access': spec.anonymous_access, 'initial_admin_password': spec.initial_admin_password, 'http_port': grafana_port, 'protocol': spec.protocol, - 'http_addr': grafana_ip + 'http_addr': grafana_ip, + 'use_url_prefix': mgmt_gw_enabled }) if 'dashboard' in self.mgr.get('mgr_map')['modules'] and spec.initial_admin_password: @@ -187,13 +191,36 @@ class GrafanaService(CephadmService): addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT spec = cast(GrafanaSpec, self.mgr.spec_store[dd.service_name()].spec) - service_url = build_url(scheme=spec.protocol, host=addr, port=port) - self._set_service_url_on_dashboard( - 'Grafana', - 'dashboard get-grafana-api-url', - 'dashboard set-grafana-api-url', - service_url - ) + + mgmt_gw_daemons = self.mgr.cache.get_daemons_by_service('mgmt-gateway') + if mgmt_gw_daemons: + dd = mgmt_gw_daemons[0] + assert dd.hostname is not None + mgmt_gw_spec = cast(MgmtGatewaySpec, self.mgr.spec_store['mgmt-gateway'].spec) + mgmt_gw_port = dd.ports[0] if dd.ports else None + mgmt_gw_addr = self._inventory_get_fqdn(dd.hostname) + protocol = 'http' if mgmt_gw_spec.disable_https else 'https' + mgmt_gw_external_endpoint = build_url(scheme=protocol, host=mgmt_gw_addr, port=mgmt_gw_port) + self._set_value_on_dashboard( + 'Grafana', + 'dashboard get-grafana-api-url', + 'dashboard set-grafana-api-url', + f'{mgmt_gw_external_endpoint}/grafana' + ) + self._set_value_on_dashboard( + 'Grafana', + 'dashboard get-grafana-api-ssl-verify', + 'dashboard set-grafana-api-ssl-verify', + 'false' + ) + else: + service_url = build_url(scheme=spec.protocol, host=addr, port=port) + self._set_value_on_dashboard( + 'Grafana', + 'dashboard get-grafana-api-url', + 'dashboard set-grafana-api-url', + service_url + ) def pre_remove(self, daemon: DaemonDescription) -> None: """ @@ -240,44 +267,15 @@ class AlertmanagerService(CephadmService): user_data['default_webhook_urls'], list): default_webhook_urls.extend(user_data['default_webhook_urls']) - # dashboard(s) - dashboard_urls: List[str] = [] - snmp_gateway_urls: List[str] = [] - mgr_map = self.mgr.get('mgr_map') - port = None - proto = None # http: or https: - url = mgr_map.get('services', {}).get('dashboard', None) - if url: - p_result = urlparse(url.rstrip('/')) - hostname = socket.getfqdn(p_result.hostname) - - try: - ip = ipaddress.ip_address(hostname) - except ValueError: - pass - else: - if ip.version == 6: - hostname = f'[{hostname}]' - - dashboard_urls.append( - f'{p_result.scheme}://{hostname}:{p_result.port}{p_result.path}') - proto = p_result.scheme - port = p_result.port - + # add a dependency since url_prefix depends on the existence of mgmt-gateway + deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('mgmt-gateway')] # scan all mgrs to generate deps and to get standbys too. - # assume that they are all on the same port as the active mgr. for dd in self.mgr.cache.get_daemons_by_service('mgr'): # we consider mgr a dep even if the dashboard is disabled # in order to be consistent with _calc_daemon_deps(). deps.append(dd.name()) - if not port: - continue - if dd.daemon_id == self.mgr.get_mgr_id(): - continue - assert dd.hostname is not None - addr = self._inventory_get_fqdn(dd.hostname) - dashboard_urls.append(build_url(scheme=proto, host=addr, port=port).rstrip('/')) + snmp_gateway_urls: List[str] = [] for dd in self.mgr.cache.get_daemons_by_service('snmp-gateway'): assert dd.hostname is not None assert dd.ports @@ -289,7 +287,7 @@ class AlertmanagerService(CephadmService): context = { 'secure_monitoring_stack': self.mgr.secure_monitoring_stack, - 'dashboard_urls': dashboard_urls, + 'dashboard_urls': get_dashboard_urls(self), 'default_webhook_urls': default_webhook_urls, 'snmp_gateway_urls': snmp_gateway_urls, 'secure': secure, @@ -304,8 +302,8 @@ class AlertmanagerService(CephadmService): addr = self._inventory_get_fqdn(dd.hostname) peers.append(build_url(host=addr, port=port).lstrip('/')) + mgmt_gw_enabled = len(self.mgr.cache.get_daemons_by_service('mgmt-gateway')) > 0 deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}') - if self.mgr.secure_monitoring_stack: alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials() if alertmanager_user and alertmanager_password: @@ -332,14 +330,16 @@ class AlertmanagerService(CephadmService): 'root_cert.pem': self.mgr.http_server.service_discovery.ssl_certs.get_root_cert() }, 'peers': peers, - 'web_config': '/etc/alertmanager/web.yml' + 'web_config': '/etc/alertmanager/web.yml', + 'use_url_prefix': mgmt_gw_enabled }, sorted(deps) else: return { "files": { "alertmanager.yml": yml }, - "peers": peers + "peers": peers, + 'use_url_prefix': mgmt_gw_enabled }, sorted(deps) def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: @@ -355,13 +355,33 @@ class AlertmanagerService(CephadmService): addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT protocol = 'https' if self.mgr.secure_monitoring_stack else 'http' - service_url = build_url(scheme=protocol, host=addr, port=port) - self._set_service_url_on_dashboard( - 'AlertManager', - 'dashboard get-alertmanager-api-host', - 'dashboard set-alertmanager-api-host', - service_url - ) + + mgmt_gw_daemons = self.mgr.cache.get_daemons_by_service('mgmt-gateway') + if mgmt_gw_daemons: + dd = mgmt_gw_daemons[0] + assert dd.hostname is not None + mgmt_gw_addr = self._inventory_get_fqdn(dd.hostname) + mgmt_gw_internal_endpoint = build_url(scheme='https', host=mgmt_gw_addr, port=MgmtGatewayService.INTERNAL_SERVICE_PORT) + self._set_value_on_dashboard( + 'AlertManager', + 'dashboard get-alertmanager-api-host', + 'dashboard set-alertmanager-api-host', + f'{mgmt_gw_internal_endpoint}/internal/alertmanager' + ) + self._set_value_on_dashboard( + 'Alertmanager', + 'dashboard get-alertmanager-api-ssl-verify', + 'dashboard set-alertmanager-api-ssl-verify', + 'false' + ) + else: + service_url = build_url(scheme=protocol, host=addr, port=port) + self._set_value_on_dashboard( + 'AlertManager', + 'dashboard get-alertmanager-api-host', + 'dashboard set-alertmanager-api-host', + service_url + ) def pre_remove(self, daemon: DaemonDescription) -> None: """ @@ -481,6 +501,7 @@ class PrometheusService(CephadmService): 'prometheus_web_password': password_hash(prometheus_password), } + mgmt_gw_enabled = len(self.mgr.cache.get_daemons_by_service('mgmt-gateway')) > 0 if self.mgr.secure_monitoring_stack: # NOTE: this prometheus root cert is managed by the prometheus module # we are using it in a read only fashion in the cephadm module @@ -510,7 +531,8 @@ class PrometheusService(CephadmService): 'retention_time': retention_time, 'retention_size': retention_size, 'ip_to_bind_to': ip_to_bind_to, - 'web_config': '/etc/prometheus/web.yml' + 'web_config': '/etc/prometheus/web.yml', + 'use_url_prefix': mgmt_gw_enabled } else: r = { @@ -519,7 +541,8 @@ class PrometheusService(CephadmService): }, 'retention_time': retention_time, 'retention_size': retention_size, - 'ip_to_bind_to': ip_to_bind_to + 'ip_to_bind_to': ip_to_bind_to, + 'use_url_prefix': mgmt_gw_enabled } # include alerts, if present in the container @@ -563,6 +586,10 @@ class PrometheusService(CephadmService): if alertmanager_user and alertmanager_password: deps.append(f'{hash(alertmanager_user + alertmanager_password)}') deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}') + + # add a dependency since url_prefix depends on the existence of mgmt-gateway + deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('mgmt-gateway')] + # add dependency on ceph-exporter daemons deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('ceph-exporter')] deps += [s for s in ['node-exporter', 'alertmanager'] if self.mgr.cache.get_daemons_by_service(s)] @@ -583,13 +610,33 @@ class PrometheusService(CephadmService): addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT protocol = 'https' if self.mgr.secure_monitoring_stack else 'http' - service_url = build_url(scheme=protocol, host=addr, port=port) - self._set_service_url_on_dashboard( - 'Prometheus', - 'dashboard get-prometheus-api-host', - 'dashboard set-prometheus-api-host', - service_url - ) + + mgmt_gw_daemons = self.mgr.cache.get_daemons_by_service('mgmt-gateway') + if mgmt_gw_daemons: + dd = mgmt_gw_daemons[0] + assert dd.hostname is not None + mgmt_gw_addr = self._inventory_get_fqdn(dd.hostname) + mgmt_gw_internal_endpoint = build_url(scheme='https', host=mgmt_gw_addr, port=MgmtGatewayService.INTERNAL_SERVICE_PORT) + self._set_value_on_dashboard( + 'Prometheus', + 'dashboard get-prometheus-api-host', + 'dashboard set-prometheus-api-host', + f'{mgmt_gw_internal_endpoint}/internal/prometheus' + ) + self._set_value_on_dashboard( + 'Prometheus', + 'dashboard get-prometheus-api-ssl-verify', + 'dashboard set-prometheus-api-ssl-verify', + 'false' + ) + else: + service_url = build_url(scheme=protocol, host=addr, port=port) + self._set_value_on_dashboard( + 'Prometheus', + 'dashboard get-prometheus-api-host', + 'dashboard set-prometheus-api-host', + service_url + ) def pre_remove(self, daemon: DaemonDescription) -> None: """ diff --git a/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 b/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 index e6c7bce1524..4d3d11e2083 100644 --- a/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 +++ b/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 @@ -14,6 +14,10 @@ cert_key = /etc/grafana/certs/cert_key http_port = {{ http_port }} http_addr = {{ http_addr }} +{% if use_url_prefix %} + root_url = %(protocol)s://%(domain)s:%(http_port)s/grafana/ + serve_from_sub_path = true +{% endif %} [snapshots] external_enabled = false [security] diff --git a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2 b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2 new file mode 100644 index 00000000000..2220e8e4759 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2 @@ -0,0 +1,75 @@ + +server { +{% if spec.disable_https %} + listen {{ spec.port or 80 }}; +{% else %} + listen {{ spec.port or 443 }} ssl; + listen [::]:{{ spec.port or 443 }} ssl; + ssl_certificate /etc/nginx/ssl/nginx.crt; + ssl_certificate_key /etc/nginx/ssl/nginx.key; + {% if spec.ssl_protocols %} + ssl_protocols {{ spec.ssl_protocols | join(' ') }}; + {% else %} + ssl_protocols TLSv1.3; + {% endif %} + {% if spec.ssl_ciphers %} + ssl_ciphers {{ spec.ssl_ciphers | join(':') }}; + {% else %} + # from: https://ssl-config.mozilla.org/#server=nginx + ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:DHE-RSA-CHACHA20-POLY1305; + {% endif %} + + # Only return Nginx in server header, no extra info will be provided + server_tokens {{ spec.server_tokens or 'off'}}; + + # Perfect Forward Secrecy(PFS) is frequently compromised without this + ssl_prefer_server_ciphers {{ spec.ssl_prefer_server_ciphers or 'on'}}; + + # Enable SSL session caching for improved performance + ssl_session_tickets {{ spec.ssl_session_tickets or 'off'}}; + ssl_session_timeout {{ spec.ssl_session_timeout or '1d'}}; + ssl_session_cache {{ spec.ssl_session_cache or 'shared:SSL:10m'}}; + + # OCSP stapling + ssl_stapling {{ spec.ssl_stapling or 'on'}}; + ssl_stapling_verify {{ spec.ssl_stapling_verify or 'on'}}; + resolver_timeout 5s; + + # Security headers + ## X-Content-Type-Options: avoid MIME type sniffing + add_header X-Content-Type-Options nosniff; + ## Strict Transport Security (HSTS): Yes + add_header Strict-Transport-Security "max-age=31536000; includeSubdomains; preload"; + ## Enables the Cross-site scripting (XSS) filter in browsers. + add_header X-XSS-Protection "1; mode=block"; + ## Content-Security-Policy (CSP): FIXME + # add_header Content-Security-Policy "default-src 'self'; script-src 'self'; object-src 'none'; base-uri 'none'; require-trusted-types-for 'script'; frame-ancestors 'self';"; + +{% endif %} + +{% if dashboard_endpoints %} + location / { + proxy_pass {{ dashboard_scheme }}://dashboard_servers; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + } +{% endif %} + +{% if grafana_endpoints %} + location /grafana { + rewrite ^/grafana/(.*) /$1 break; + proxy_pass {{ grafana_scheme }}://grafana_servers; + } +{% endif %} + +{% if prometheus_endpoints %} + location /prometheus { + proxy_pass {{ prometheus_scheme }}://prometheus_servers; + } +{% endif %} + +{% if alertmanager_endpoints %} + location /alertmanager { + proxy_pass {{ alertmanager_scheme }}://alertmanager_servers; + } +{% endif %} +} diff --git a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2 b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2 new file mode 100644 index 00000000000..6848c04ebe8 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2 @@ -0,0 +1,31 @@ + +server { + listen {{ internal_port }} ssl; + listen [::]:{{ internal_port }} ssl; + ssl_certificate /etc/nginx/ssl/nginx_internal.crt; + ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers AES128-SHA:AES256-SHA:RC4-SHA:DES-CBC3-SHA:RC4-MD5; + ssl_prefer_server_ciphers on; + +{% if grafana_endpoints %} + location /internal/grafana { + rewrite ^/internal/grafana/(.*) /$1 break; + proxy_pass {{ grafana_scheme }}://grafana_servers; + } +{% endif %} + +{% if prometheus_endpoints %} + location /internal/prometheus { + rewrite ^/internal/prometheus/(.*) /prometheus/$1 break; + proxy_pass {{ prometheus_scheme }}://prometheus_servers; + } +{% endif %} + +{% if alertmanager_endpoints %} + location /internal/alertmanager { + rewrite ^/internal/alertmanager/(.*) /alertmanager/$1 break; + proxy_pass {{ alertmanager_scheme }}://alertmanager_servers; + } +{% endif %} +} diff --git a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2 b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2 new file mode 100644 index 00000000000..9ce6eb9867d --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2 @@ -0,0 +1,44 @@ + +# {{ cephadm_managed }} +worker_rlimit_nofile 8192; + +events { + worker_connections 4096; +} + +http { +{% if dashboard_endpoints %} + upstream dashboard_servers { + {% for ep in dashboard_endpoints %} + server {{ ep }}; + {% endfor %} + } +{% endif %} + +{% if grafana_endpoints %} + upstream grafana_servers { + {% for ep in grafana_endpoints %} + server {{ ep }}; + {% endfor %} + } +{% endif %} + +{% if prometheus_endpoints %} + upstream prometheus_servers { + {% for ep in prometheus_endpoints %} + server {{ ep }}; + {% endfor %} + } +{% endif %} + +{% if alertmanager_endpoints %} + upstream alertmanager_servers { + {% for ep in alertmanager_endpoints %} + server {{ ep }}; + {% endfor %} + } +{% endif %} + + include /etc/nginx_external_server.conf; + include /etc/nginx_internal_server.conf; +} diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index d9b1f5f8e78..996c1d5248c 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -1725,6 +1725,7 @@ class TestCephadm(object): 'ingress_ssl_cert': False, 'agent_endpoint_root_cert': False, 'service_discovery_root_cert': False, + 'mgmt_gw_root_cert': False, 'grafana_cert': False, 'alertmanager_cert': False, 'prometheus_cert': False, @@ -1774,6 +1775,7 @@ class TestCephadm(object): 'service_discovery_key': False, 'grafana_key': False, 'alertmanager_key': False, + 'mgmt_gw_root_key': False, 'prometheus_key': False, 'node_exporter_key': False, 'iscsi_ssl_key': False, diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index 77d127f81e5..63b4068f15e 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -35,6 +35,7 @@ from ceph.deployment.service_spec import ( SNMPGatewaySpec, ServiceSpec, TracingSpec, + MgmtGatewaySpec, ) from cephadm.tests.fixtures import with_host, with_service, _run_cephadm, async_side_effect @@ -45,9 +46,9 @@ from orchestrator._interface import DaemonDescription from typing import Dict, List -grafana_cert = """-----BEGIN CERTIFICATE-----\nMIICxjCCAa4CEQDIZSujNBlKaLJzmvntjukjMA0GCSqGSIb3DQEBDQUAMCExDTAL\nBgNVBAoMBENlcGgxEDAOBgNVBAMMB2NlcGhhZG0wHhcNMjIwNzEzMTE0NzA3WhcN\nMzIwNzEwMTE0NzA3WjAhMQ0wCwYDVQQKDARDZXBoMRAwDgYDVQQDDAdjZXBoYWRt\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAyyMe4DMA+MeYK7BHZMHB\nq7zjliEOcNgxomjU8qbf5USF7Mqrf6+/87XWqj4pCyAW8x0WXEr6A56a+cmBVmt+\nqtWDzl020aoId6lL5EgLLn6/kMDCCJLq++Lg9cEofMSvcZh+lY2f+1p+C+00xent\nrLXvXGOilAZWaQfojT2BpRnNWWIFbpFwlcKrlg2G0cFjV5c1m6a0wpsQ9JHOieq0\nSvwCixajwq3CwAYuuiU1wjI4oJO4Io1+g8yB3nH2Mo/25SApCxMXuXh4kHLQr/T4\n4hqisvG4uJYgKMcSIrWj5o25mclByGi1UI/kZkCUES94i7Z/3ihx4Bad0AMs/9tw\nFwIDAQABMA0GCSqGSIb3DQEBDQUAA4IBAQAf+pwz7Gd7mDwU2LY0TQXsK6/8KGzh\nHuX+ErOb8h5cOAbvCnHjyJFWf6gCITG98k9nxU9NToG0WYuNm/max1y/54f0dtxZ\npUo6KSNl3w6iYCfGOeUIj8isi06xMmeTgMNzv8DYhDt+P2igN6LenqWTVztogkiV\nxQ5ZJFFLEw4sN0CXnrZX3t5ruakxLXLTLKeE0I91YJvjClSBGkVJq26wOKQNHMhx\npWxeydQ5EgPZY+Aviz5Dnxe8aB7oSSovpXByzxURSabOuCK21awW5WJCGNpmqhWK\nZzACBDEstccj57c4OGV0eayHJRsluVr2e9NHRINZA3qdB37e6gsI1xHo\n-----END CERTIFICATE-----\n""" +ceph_generated_cert = """-----BEGIN CERTIFICATE-----\nMIICxjCCAa4CEQDIZSujNBlKaLJzmvntjukjMA0GCSqGSIb3DQEBDQUAMCExDTAL\nBgNVBAoMBENlcGgxEDAOBgNVBAMMB2NlcGhhZG0wHhcNMjIwNzEzMTE0NzA3WhcN\nMzIwNzEwMTE0NzA3WjAhMQ0wCwYDVQQKDARDZXBoMRAwDgYDVQQDDAdjZXBoYWRt\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAyyMe4DMA+MeYK7BHZMHB\nq7zjliEOcNgxomjU8qbf5USF7Mqrf6+/87XWqj4pCyAW8x0WXEr6A56a+cmBVmt+\nqtWDzl020aoId6lL5EgLLn6/kMDCCJLq++Lg9cEofMSvcZh+lY2f+1p+C+00xent\nrLXvXGOilAZWaQfojT2BpRnNWWIFbpFwlcKrlg2G0cFjV5c1m6a0wpsQ9JHOieq0\nSvwCixajwq3CwAYuuiU1wjI4oJO4Io1+g8yB3nH2Mo/25SApCxMXuXh4kHLQr/T4\n4hqisvG4uJYgKMcSIrWj5o25mclByGi1UI/kZkCUES94i7Z/3ihx4Bad0AMs/9tw\nFwIDAQABMA0GCSqGSIb3DQEBDQUAA4IBAQAf+pwz7Gd7mDwU2LY0TQXsK6/8KGzh\nHuX+ErOb8h5cOAbvCnHjyJFWf6gCITG98k9nxU9NToG0WYuNm/max1y/54f0dtxZ\npUo6KSNl3w6iYCfGOeUIj8isi06xMmeTgMNzv8DYhDt+P2igN6LenqWTVztogkiV\nxQ5ZJFFLEw4sN0CXnrZX3t5ruakxLXLTLKeE0I91YJvjClSBGkVJq26wOKQNHMhx\npWxeydQ5EgPZY+Aviz5Dnxe8aB7oSSovpXByzxURSabOuCK21awW5WJCGNpmqhWK\nZzACBDEstccj57c4OGV0eayHJRsluVr2e9NHRINZA3qdB37e6gsI1xHo\n-----END CERTIFICATE-----\n""" -grafana_key = """-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQDLIx7gMwD4x5gr\nsEdkwcGrvOOWIQ5w2DGiaNTypt/lRIXsyqt/r7/ztdaqPikLIBbzHRZcSvoDnpr5\nyYFWa36q1YPOXTbRqgh3qUvkSAsufr+QwMIIkur74uD1wSh8xK9xmH6VjZ/7Wn4L\n7TTF6e2ste9cY6KUBlZpB+iNPYGlGc1ZYgVukXCVwquWDYbRwWNXlzWbprTCmxD0\nkc6J6rRK/AKLFqPCrcLABi66JTXCMjigk7gijX6DzIHecfYyj/blICkLExe5eHiQ\nctCv9PjiGqKy8bi4liAoxxIitaPmjbmZyUHIaLVQj+RmQJQRL3iLtn/eKHHgFp3Q\nAyz/23AXAgMBAAECggEAVoTB3Mm8azlPlaQB9GcV3tiXslSn+uYJ1duCf0sV52dV\nBzKW8s5fGiTjpiTNhGCJhchowqxoaew+o47wmGc2TvqbpeRLuecKrjScD0GkCYyQ\neM2wlshEbz4FhIZdgS6gbuh9WaM1dW/oaZoBNR5aTYo7xYTmNNeyLA/jO2zr7+4W\n5yES1lMSBXpKk7bDGKYY4bsX2b5RLr2Grh2u2bp7hoLABCEvuu8tSQdWXLEXWpXo\njwmV3hc6tabypIa0mj2Dmn2Dmt1ppSO0AZWG/WAizN3f4Z0r/u9HnbVrVmh0IEDw\n3uf2LP5o3msG9qKCbzv3lMgt9mMr70HOKnJ8ohMSKQKBgQDLkNb+0nr152HU9AeJ\nvdz8BeMxcwxCG77iwZphZ1HprmYKvvXgedqWtS6FRU+nV6UuQoPUbQxJBQzrN1Qv\nwKSlOAPCrTJgNgF/RbfxZTrIgCPuK2KM8I89VZv92TSGi362oQA4MazXC8RAWjoJ\nSu1/PHzK3aXOfVNSLrOWvIYeZQKBgQD/dgT6RUXKg0UhmXj7ExevV+c7oOJTDlMl\nvLngrmbjRgPO9VxLnZQGdyaBJeRngU/UXfNgajT/MU8B5fSKInnTMawv/tW7634B\nw3v6n5kNIMIjJmENRsXBVMllDTkT9S7ApV+VoGnXRccbTiDapBThSGd0wri/CuwK\nNWK1YFOeywKBgEDyI/XG114PBUJ43NLQVWm+wx5qszWAPqV/2S5MVXD1qC6zgCSv\nG9NLWN1CIMimCNg6dm7Wn73IM7fzvhNCJgVkWqbItTLG6DFf3/DPODLx1wTMqLOI\nqFqMLqmNm9l1Nec0dKp5BsjRQzq4zp1aX21hsfrTPmwjxeqJZdioqy2VAoGAXR5X\nCCdSHlSlUW8RE2xNOOQw7KJjfWT+WAYoN0c7R+MQplL31rRU7dpm1bLLRBN11vJ8\nMYvlT5RYuVdqQSP6BkrX+hLJNBvOLbRlL+EXOBrVyVxHCkDe+u7+DnC4epbn+N8P\nLYpwqkDMKB7diPVAizIKTBxinXjMu5fkKDs5n+sCgYBbZheYKk5M0sIxiDfZuXGB\nkf4mJdEkTI1KUGRdCwO/O7hXbroGoUVJTwqBLi1tKqLLarwCITje2T200BYOzj82\nqwRkCXGtXPKnxYEEUOiFx9OeDrzsZV00cxsEnX0Zdj+PucQ/J3Cvd0dWUspJfLHJ\n39gnaegswnz9KMQAvzKFdg==\n-----END PRIVATE KEY-----\n""" +ceph_generated_key = """-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQDLIx7gMwD4x5gr\nsEdkwcGrvOOWIQ5w2DGiaNTypt/lRIXsyqt/r7/ztdaqPikLIBbzHRZcSvoDnpr5\nyYFWa36q1YPOXTbRqgh3qUvkSAsufr+QwMIIkur74uD1wSh8xK9xmH6VjZ/7Wn4L\n7TTF6e2ste9cY6KUBlZpB+iNPYGlGc1ZYgVukXCVwquWDYbRwWNXlzWbprTCmxD0\nkc6J6rRK/AKLFqPCrcLABi66JTXCMjigk7gijX6DzIHecfYyj/blICkLExe5eHiQ\nctCv9PjiGqKy8bi4liAoxxIitaPmjbmZyUHIaLVQj+RmQJQRL3iLtn/eKHHgFp3Q\nAyz/23AXAgMBAAECggEAVoTB3Mm8azlPlaQB9GcV3tiXslSn+uYJ1duCf0sV52dV\nBzKW8s5fGiTjpiTNhGCJhchowqxoaew+o47wmGc2TvqbpeRLuecKrjScD0GkCYyQ\neM2wlshEbz4FhIZdgS6gbuh9WaM1dW/oaZoBNR5aTYo7xYTmNNeyLA/jO2zr7+4W\n5yES1lMSBXpKk7bDGKYY4bsX2b5RLr2Grh2u2bp7hoLABCEvuu8tSQdWXLEXWpXo\njwmV3hc6tabypIa0mj2Dmn2Dmt1ppSO0AZWG/WAizN3f4Z0r/u9HnbVrVmh0IEDw\n3uf2LP5o3msG9qKCbzv3lMgt9mMr70HOKnJ8ohMSKQKBgQDLkNb+0nr152HU9AeJ\nvdz8BeMxcwxCG77iwZphZ1HprmYKvvXgedqWtS6FRU+nV6UuQoPUbQxJBQzrN1Qv\nwKSlOAPCrTJgNgF/RbfxZTrIgCPuK2KM8I89VZv92TSGi362oQA4MazXC8RAWjoJ\nSu1/PHzK3aXOfVNSLrOWvIYeZQKBgQD/dgT6RUXKg0UhmXj7ExevV+c7oOJTDlMl\nvLngrmbjRgPO9VxLnZQGdyaBJeRngU/UXfNgajT/MU8B5fSKInnTMawv/tW7634B\nw3v6n5kNIMIjJmENRsXBVMllDTkT9S7ApV+VoGnXRccbTiDapBThSGd0wri/CuwK\nNWK1YFOeywKBgEDyI/XG114PBUJ43NLQVWm+wx5qszWAPqV/2S5MVXD1qC6zgCSv\nG9NLWN1CIMimCNg6dm7Wn73IM7fzvhNCJgVkWqbItTLG6DFf3/DPODLx1wTMqLOI\nqFqMLqmNm9l1Nec0dKp5BsjRQzq4zp1aX21hsfrTPmwjxeqJZdioqy2VAoGAXR5X\nCCdSHlSlUW8RE2xNOOQw7KJjfWT+WAYoN0c7R+MQplL31rRU7dpm1bLLRBN11vJ8\nMYvlT5RYuVdqQSP6BkrX+hLJNBvOLbRlL+EXOBrVyVxHCkDe+u7+DnC4epbn+N8P\nLYpwqkDMKB7diPVAizIKTBxinXjMu5fkKDs5n+sCgYBbZheYKk5M0sIxiDfZuXGB\nkf4mJdEkTI1KUGRdCwO/O7hXbroGoUVJTwqBLi1tKqLLarwCITje2T200BYOzj82\nqwRkCXGtXPKnxYEEUOiFx9OeDrzsZV00cxsEnX0Zdj+PucQ/J3Cvd0dWUspJfLHJ\n39gnaegswnz9KMQAvzKFdg==\n-----END PRIVATE KEY-----\n""" class FakeInventory: @@ -91,17 +92,17 @@ class FakeMgr: class TestCephadmService: - def test_set_service_url_on_dashboard(self): + def test_set_value_on_dashboard(self): # pylint: disable=protected-access mgr = FakeMgr() service_url = 'http://svc:1000' service = GrafanaService(mgr) - service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url) + service._set_value_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url) assert mgr.config == service_url # set-cmd should not be called if value doesn't change mgr.check_mon_command.reset_mock() - service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url) + service._set_value_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url) mgr.check_mon_command.assert_called_once_with({'prefix': 'get-cmd'}) def _get_services(self, mgr): @@ -591,6 +592,7 @@ class TestMonitoring: "alertmanager.yml": y, }, "peers": [], + "use_url_prefix": False, } }), use_current_daemon_image=False, @@ -687,6 +689,7 @@ class TestMonitoring: }, 'peers': [], 'web_config': '/etc/alertmanager/web.yml', + "use_url_prefix": False, } }), use_current_daemon_image=False, @@ -828,6 +831,7 @@ class TestMonitoring: 'retention_time': '15d', 'retention_size': '0', 'ip_to_bind_to': '1.2.3.1', + "use_url_prefix": False }, }), use_current_daemon_image=False, @@ -1011,6 +1015,7 @@ class TestMonitoring: 'retention_size': '0', 'ip_to_bind_to': '', 'web_config': '/etc/prometheus/web.yml', + "use_url_prefix": False }, }), use_current_daemon_image=False, @@ -1158,8 +1163,8 @@ class TestMonitoring: _run_cephadm.side_effect = async_side_effect(("{}", "", 0)) with with_host(cephadm_module, "test"): - cephadm_module.cert_key_store.save_cert('grafana_cert', grafana_cert, host='test') - cephadm_module.cert_key_store.save_key('grafana_key', grafana_key, host='test') + cephadm_module.cert_key_store.save_cert('grafana_cert', ceph_generated_cert, host='test') + cephadm_module.cert_key_store.save_key('grafana_key', ceph_generated_key, host='test') with with_service( cephadm_module, PrometheusSpec("prometheus") ) as _, with_service(cephadm_module, ServiceSpec("mgr")) as _, with_service( @@ -1214,9 +1219,9 @@ class TestMonitoring: isDefault: false editable: false""").lstrip(), 'certs/cert_file': dedent(f""" - # generated by cephadm\n{grafana_cert}""").lstrip(), + # generated by cephadm\n{ceph_generated_cert}""").lstrip(), 'certs/cert_key': dedent(f""" - # generated by cephadm\n{grafana_key}""").lstrip(), + # generated by cephadm\n{ceph_generated_key}""").lstrip(), 'provisioning/dashboards/default.yml': dedent(""" # This file is generated by cephadm. apiVersion: 1 @@ -1974,7 +1979,6 @@ class TestIngressService: @patch("cephadm.serve.CephadmServe._run_cephadm") def test_ingress_config_ssl_rgw(self, _run_cephadm, cephadm_module: CephadmOrchestrator): _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) - with with_host(cephadm_module, 'test'): cephadm_module.cache.update_host_networks('test', { '1.2.3.0/24': { @@ -2103,7 +2107,6 @@ class TestIngressService: @patch("cephadm.serve.CephadmServe._run_cephadm") def test_ingress_config_multi_vips(self, _run_cephadm, cephadm_module: CephadmOrchestrator): _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) - with with_host(cephadm_module, 'test', addr='1.2.3.7'): cephadm_module.cache.update_host_networks('test', { '1.2.3.0/24': { @@ -2231,7 +2234,6 @@ class TestIngressService: @patch("cephadm.serve.CephadmServe._run_cephadm") def test_keepalive_config_multi_interface_vips(self, _run_cephadm, cephadm_module: CephadmOrchestrator): _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) - with with_host(cephadm_module, 'test', addr='1.2.3.1'): with with_host(cephadm_module, 'test2', addr='1.2.3.2'): cephadm_module.cache.update_host_networks('test', { @@ -3193,3 +3195,178 @@ class TestSMB: stdin=json.dumps(expected), use_current_daemon_image=False, ) + + +class TestMgmtGateway: + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("cephadm.services.mgmt_gateway.MgmtGatewayService.get_service_endpoints") + @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1') + @patch('cephadm.ssl_cert_utils.SSLCerts.generate_cert', lambda instance, fqdn, ip: (ceph_generated_cert, ceph_generated_key)) + @patch("cephadm.services.mgmt_gateway.get_dashboard_endpoints", lambda _: (["ceph-node-2:8443", "ceph-node-2:8443"], "https")) + def test_mgmt_gateway_config(self, get_service_endpoints_mock: List[str], _run_cephadm, cephadm_module: CephadmOrchestrator): + + def get_services_endpoints(name): + if name == 'prometheus': + return ["192.168.100.100:9095", "192.168.100.101:9095"] + elif name == 'grafana': + return ["ceph-node-2:3000", "ceph-node-2:3000"] + elif name == 'alertmanager': + return ["192.168.100.100:9093", "192.168.100.102:9093"] + return [] + + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + get_service_endpoints_mock.side_effect = get_services_endpoints + + server_port = 5555 + spec = MgmtGatewaySpec(port=server_port, + ssl_certificate=ceph_generated_cert, + ssl_certificate_key=ceph_generated_key) + + expected = { + "fsid": "fsid", + "name": "mgmt-gateway.ceph-node", + "image": "", + "deploy_arguments": [], + "params": {"tcp_ports": [server_port]}, + "meta": { + "service_name": "mgmt-gateway", + "ports": [server_port], + "ip": None, + "deployed_by": [], + "rank": None, + "rank_generation": None, + "extra_container_args": None, + "extra_entrypoint_args": None + }, + "config_blobs": { + "files": { + "nginx.conf": dedent(""" + # This file is generated by cephadm. + worker_rlimit_nofile 8192; + + events { + worker_connections 4096; + } + + http { + upstream dashboard_servers { + server ceph-node-2:8443; + server ceph-node-2:8443; + } + + upstream grafana_servers { + server ceph-node-2:3000; + server ceph-node-2:3000; + } + + upstream prometheus_servers { + server 192.168.100.100:9095; + server 192.168.100.101:9095; + } + + upstream alertmanager_servers { + server 192.168.100.100:9093; + server 192.168.100.102:9093; + } + + include /etc/nginx_external_server.conf; + include /etc/nginx_internal_server.conf; + }"""), + "nginx_external_server.conf": dedent(""" + server { + listen 5555 ssl; + listen [::]:5555 ssl; + ssl_certificate /etc/nginx/ssl/nginx.crt; + ssl_certificate_key /etc/nginx/ssl/nginx.key; + ssl_protocols TLSv1.3; + # from: https://ssl-config.mozilla.org/#server=nginx + ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:DHE-RSA-CHACHA20-POLY1305; + + # Only return Nginx in server header, no extra info will be provided + server_tokens off; + + # Perfect Forward Secrecy(PFS) is frequently compromised without this + ssl_prefer_server_ciphers on; + + # Enable SSL session caching for improved performance + ssl_session_tickets off; + ssl_session_timeout 1d; + ssl_session_cache shared:SSL:10m; + + # OCSP stapling + ssl_stapling on; + ssl_stapling_verify on; + resolver_timeout 5s; + + # Security headers + ## X-Content-Type-Options: avoid MIME type sniffing + add_header X-Content-Type-Options nosniff; + ## Strict Transport Security (HSTS): Yes + add_header Strict-Transport-Security "max-age=31536000; includeSubdomains; preload"; + ## Enables the Cross-site scripting (XSS) filter in browsers. + add_header X-XSS-Protection "1; mode=block"; + ## Content-Security-Policy (CSP): FIXME + # add_header Content-Security-Policy "default-src 'self'; script-src 'self'; object-src 'none'; base-uri 'none'; require-trusted-types-for 'script'; frame-ancestors 'self';"; + + + location / { + proxy_pass https://dashboard_servers; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + } + + location /grafana { + rewrite ^/grafana/(.*) /$1 break; + proxy_pass https://grafana_servers; + } + + location /prometheus { + proxy_pass http://prometheus_servers; + } + + location /alertmanager { + proxy_pass http://alertmanager_servers; + } + }"""), + "nginx_internal_server.conf": dedent(""" + server { + listen 29443 ssl; + listen [::]:29443 ssl; + ssl_certificate /etc/nginx/ssl/nginx_internal.crt; + ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers AES128-SHA:AES256-SHA:RC4-SHA:DES-CBC3-SHA:RC4-MD5; + ssl_prefer_server_ciphers on; + + location /internal/grafana { + rewrite ^/internal/grafana/(.*) /$1 break; + proxy_pass https://grafana_servers; + } + + location /internal/prometheus { + rewrite ^/internal/prometheus/(.*) /prometheus/$1 break; + proxy_pass http://prometheus_servers; + } + + location /internal/alertmanager { + rewrite ^/internal/alertmanager/(.*) /alertmanager/$1 break; + proxy_pass http://alertmanager_servers; + } + }"""), + "nginx_internal.crt": f"{ceph_generated_cert}", + "nginx_internal.key": f"{ceph_generated_key}", + "nginx.crt": f"{ceph_generated_cert}", + "nginx.key": f"{ceph_generated_key}", + } + } + } + + with with_host(cephadm_module, 'ceph-node'): + with with_service(cephadm_module, spec): + _run_cephadm.assert_called_with( + 'ceph-node', + 'mgmt-gateway.ceph-node', + ['_orch', 'deploy'], + [], + stdin=json.dumps(expected), + use_current_daemon_image=False, + ) diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index 8a04e31170a..0a320e34556 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -42,6 +42,7 @@ from ceph.deployment.service_spec import ( RGWSpec, SMBSpec, SNMPGatewaySpec, + MgmtGatewaySpec, ServiceSpec, TunedProfileSpec, ) @@ -590,6 +591,7 @@ class Orchestrator(object): 'snmp-gateway': self.apply_snmp_gateway, 'host': self.add_host, 'smb': self.apply_smb, + 'mgmt-gateway': self.apply_mgmt_gateway, } def merge(l: OrchResult[List[str]], r: OrchResult[str]) -> OrchResult[List[str]]: # noqa: E741 @@ -831,6 +833,10 @@ class Orchestrator(object): """Update an existing snmp gateway service""" raise NotImplementedError() + def apply_mgmt_gateway(self, spec: MgmtGatewaySpec) -> OrchResult[str]: + """Update an existing cluster gateway service""" + raise NotImplementedError() + def apply_smb(self, spec: SMBSpec) -> OrchResult[str]: """Update a smb gateway service""" raise NotImplementedError() @@ -914,6 +920,7 @@ def daemon_type_to_service(dtype: str) -> str: 'keepalived': 'ingress', 'iscsi': 'iscsi', 'nvmeof': 'nvmeof', + 'mgmt-gateway': 'mgmt-gateway', 'rbd-mirror': 'rbd-mirror', 'cephfs-mirror': 'cephfs-mirror', 'nfs': 'nfs', @@ -949,6 +956,7 @@ def service_to_daemon_types(stype: str) -> List[str]: 'ingress': ['haproxy', 'keepalived'], 'iscsi': ['iscsi'], 'nvmeof': ['nvmeof'], + 'mgmt-gateway': ['mgmt-gateway'], 'rbd-mirror': ['rbd-mirror'], 'cephfs-mirror': ['cephfs-mirror'], 'nfs': ['nfs'], diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index 5b5d7154da1..3b943d41ca9 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -46,6 +46,7 @@ from ._interface import ( RGWSpec, SMBSpec, SNMPGatewaySpec, + MgmtGatewaySpec, ServiceDescription, TunedProfileSpec, _cli_read_command, @@ -1777,6 +1778,32 @@ Usage: return self._apply_misc([spec], dry_run, format, no_overwrite) + @_cli_write_command('orch apply mgmt-gateway') + def _apply_mgmt_gateway(self, + port: Optional[int] = None, + disable_https: Optional[bool] = False, + placement: Optional[str] = None, + unmanaged: bool = False, + dry_run: bool = False, + format: Format = Format.plain, + no_overwrite: bool = False, + inbuf: Optional[str] = None) -> HandleCommandResult: + """Add a cluster gateway service (cephadm only)""" + if inbuf: + raise OrchestratorValidationError('unrecognized command -i; -h or --help for usage') + + spec = MgmtGatewaySpec( + placement=PlacementSpec.from_string(placement), + unmanaged=unmanaged, + port=port, + disable_https=disable_https, + preview_only=dry_run + ) + + spec.validate() # force any validation exceptions to be caught correctly + + return self._apply_misc([spec], dry_run, format, no_overwrite) + @_cli_write_command('orch apply nvmeof') def _apply_nvmeof(self, pool: str, diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py index b91b62b02ac..5a294898fc5 100644 --- a/src/python-common/ceph/deployment/service_spec.py +++ b/src/python-common/ceph/deployment/service_spec.py @@ -765,6 +765,7 @@ class ServiceSpec(object): 'elasticsearch', 'grafana', 'ingress', + 'mgmt-gateway', 'iscsi', 'jaeger-agent', 'jaeger-collector', @@ -819,6 +820,7 @@ class ServiceSpec(object): 'nvmeof': NvmeofServiceSpec, 'alertmanager': AlertManagerSpec, 'ingress': IngressSpec, + 'mgmt-gateway': MgmtGatewaySpec, 'container': CustomContainerSpec, 'grafana': GrafanaSpec, 'node-exporter': MonitoringSpec, @@ -1755,6 +1757,135 @@ class IngressSpec(ServiceSpec): yaml.add_representer(IngressSpec, ServiceSpec.yaml_representer) +class MgmtGatewaySpec(ServiceSpec): + def __init__(self, + service_type: str = 'mgmt-gateway', + service_id: Optional[str] = None, + config: Optional[Dict[str, str]] = None, + networks: Optional[List[str]] = None, + placement: Optional[PlacementSpec] = None, + disable_https: Optional[bool] = False, + port: Optional[int] = None, + ssl_certificate: Optional[str] = None, + ssl_certificate_key: Optional[str] = None, + ssl_prefer_server_ciphers: Optional[str] = None, + ssl_session_tickets: Optional[str] = None, + ssl_session_timeout: Optional[str] = None, + ssl_session_cache: Optional[str] = None, + server_tokens: Optional[str] = None, + ssl_stapling: Optional[str] = None, + ssl_stapling_verify: Optional[str] = None, + ssl_protocols: Optional[List[str]] = None, + ssl_ciphers: Optional[List[str]] = None, + preview_only: bool = False, + unmanaged: bool = False, + extra_container_args: Optional[GeneralArgList] = None, + extra_entrypoint_args: Optional[GeneralArgList] = None, + custom_configs: Optional[List[CustomConfig]] = None, + ): + assert service_type == 'mgmt-gateway' + + super(MgmtGatewaySpec, self).__init__( + 'mgmt-gateway', service_id=service_id, + placement=placement, config=config, + networks=networks, + preview_only=preview_only, + extra_container_args=extra_container_args, + extra_entrypoint_args=extra_entrypoint_args, + custom_configs=custom_configs + ) + #: Is a flag to disable HTTPS. If True, the server will use unsecure HTTP + self.disable_https = disable_https + #: The port number on which the server will listen + self.port = port + #: A multi-line string that contains the SSL certificate + self.ssl_certificate = ssl_certificate + #: A multi-line string that contains the SSL key + self.ssl_certificate_key = ssl_certificate_key + #: Prefer server ciphers over client ciphers: on | off + self.ssl_prefer_server_ciphers = ssl_prefer_server_ciphers + #: A multioption flag to control session tickets: on | off + self.ssl_session_tickets = ssl_session_tickets + #: The duration for SSL session timeout. Syntax: time (i.e: 5m) + self.ssl_session_timeout = ssl_session_timeout + #: Duration an SSL/TLS session is cached: off | none | [builtin[:size]] [shared:name:size] + self.ssl_session_cache = ssl_session_cache + #: Flag control server tokens in responses: on | off | build | string + self.server_tokens = server_tokens + #: Flag to enable or disable SSL stapling: on | off + self.ssl_stapling = ssl_stapling + #: Flag to control verification of SSL stapling: on | off + self.ssl_stapling_verify = ssl_stapling_verify + #: A list of supported SSL protocols (as supported by nginx) + self.ssl_protocols = ssl_protocols + #: List of supported secure SSL ciphers. Changing this list may reduce system security. + self.ssl_ciphers = ssl_ciphers + + def get_port_start(self) -> List[int]: + ports = [] + if self.port is not None: + ports.append(cast(int, self.port)) + return ports + + def validate(self) -> None: + super(MgmtGatewaySpec, self).validate() + self._validate_port(self.port) + self._validate_certificate(self.ssl_certificate, "ssl_certificate") + self._validate_private_key(self.ssl_certificate_key, "ssl_certificate_key") + self._validate_boolean_switch(self.ssl_prefer_server_ciphers, "ssl_prefer_server_ciphers") + self._validate_boolean_switch(self.ssl_session_tickets, "ssl_session_tickets") + self._validate_session_timeout(self.ssl_session_timeout) + self._validate_session_cache(self.ssl_session_cache) + self._validate_server_tokens(self.server_tokens) + self._validate_boolean_switch(self.ssl_stapling, "ssl_stapling") + self._validate_boolean_switch(self.ssl_stapling_verify, "ssl_stapling_verify") + self._validate_ssl_protocols(self.ssl_protocols) + + def _validate_port(self, port: Optional[int]) -> None: + if port is not None and not (1 <= port <= 65535): + raise SpecValidationError(f"Invalid port: {port}. Must be between 1 and 65535.") + + def _validate_certificate(self, cert: Optional[str], name: str) -> None: + if cert is not None and not isinstance(cert, str): + raise SpecValidationError(f"Invalid {name}. Must be a string.") + + def _validate_private_key(self, key: Optional[str], name: str) -> None: + if key is not None and not isinstance(key, str): + raise SpecValidationError(f"Invalid {name}. Must be a string.") + + def _validate_boolean_switch(self, value: Optional[str], name: str) -> None: + if value is not None and value not in ['on', 'off']: + raise SpecValidationError(f"Invalid {name}: {value}. Supported values: on | off.") + + def _validate_session_timeout(self, timeout: Optional[str]) -> None: + if timeout is not None and not re.match(r'^\d+[smhd]$', timeout): + raise SpecValidationError(f"Invalid SSL Session Timeout: {timeout}. \ + Value must be a number followed by 's', 'm', 'h', or 'd'.") + + def _validate_session_cache(self, cache: Optional[str]) -> None: + valid_caches = ['none', 'off', 'builtin', 'shared'] + if cache is not None and not any(cache.startswith(vc) for vc in valid_caches): + raise SpecValidationError(f"Invalid SSL Session Cache: {cache}. Supported values are: \ + off | none | [builtin[:size]] [shared:name:size]") + + def _validate_server_tokens(self, tokens: Optional[str]) -> None: + if tokens is not None and tokens not in ['on', 'off', 'build', 'string']: + raise SpecValidationError(f"Invalid Server Tokens: {tokens}. Must be one of \ + ['on', 'off', 'build', 'version'].") + + def _validate_ssl_protocols(self, protocols: Optional[List[str]]) -> None: + if protocols is None: + return + valid_protocols = ['TLSv1.2', 'TLSv1.3'] + for protocol in protocols: + if protocol not in valid_protocols: + raise SpecValidationError(f"Invalid SSL Protocol: {protocol}. \ + Must be one of {valid_protocols}.") + + +yaml.add_representer(MgmtGatewaySpec, ServiceSpec.yaml_representer) + + class InitContainerSpec(object): """An init container is not a service that lives on its own, but rather is used to run and exit prior to a service container starting in order