]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/cephadm: adding new cephadm service mgmt-gateway
authorRedouane Kachach <rkachach@ibm.com>
Fri, 17 May 2024 13:19:35 +0000 (15:19 +0200)
committerRedouane Kachach <rkachach@ibm.com>
Tue, 9 Jul 2024 13:27:20 +0000 (15:27 +0200)
adding mgmt-gateway, a new cephadm service based on nginx, to act as
the front-end and single entry point to the cluster. This gateway
offers unified access to all Ceph applications, including the
Ceph dashboard and monitoring tools (Prometheus, Grafana, ..),
while enhancing security and simplifying access management
through nginx.

Fixes: https://tracker.ceph.com/issues/66095
Signed-off-by: Redouane Kachach <rkachach@ibm.com>
20 files changed:
src/cephadm/cephadm.py
src/cephadm/cephadmlib/constants.py
src/cephadm/cephadmlib/daemons/__init__.py
src/cephadm/cephadmlib/daemons/mgmt_gateway.py [new file with mode: 0644]
src/cephadm/cephadmlib/daemons/monitoring.py
src/pybind/mgr/cephadm/inventory.py
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/serve.py
src/pybind/mgr/cephadm/services/cephadmservice.py
src/pybind/mgr/cephadm/services/mgmt_gateway.py [new file with mode: 0644]
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2
src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2 [new file with mode: 0644]
src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2 [new file with mode: 0644]
src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2 [new file with mode: 0644]
src/pybind/mgr/cephadm/tests/test_cephadm.py
src/pybind/mgr/cephadm/tests/test_services.py
src/pybind/mgr/orchestrator/_interface.py
src/pybind/mgr/orchestrator/module.py
src/python-common/ceph/deployment/service_spec.py

index 8949f1ce6bf618cf6f4c9ccf17914f1d521a0567..95169358f3a71b0b4f4151a7842dda1fb2db495f 100755 (executable)
@@ -176,6 +176,7 @@ from cephadmlib.daemons import (
     NFSGanesha,
     SMB,
     SNMPGateway,
+    MgmtGateway,
     Tracing,
     NodeProxy,
 )
@@ -227,6 +228,7 @@ def get_supported_daemons():
     supported_daemons.append(Keepalived.daemon_type)
     supported_daemons.append(CephadmAgent.daemon_type)
     supported_daemons.append(SNMPGateway.daemon_type)
+    supported_daemons.append(MgmtGateway.daemon_type)
     supported_daemons.extend(Tracing.components)
     supported_daemons.append(NodeProxy.daemon_type)
     supported_daemons.append(SMB.daemon_type)
@@ -463,6 +465,8 @@ def update_default_image(ctx: CephadmContext) -> None:
             ctx.image = Keepalived.default_image
         if type_ == SNMPGateway.daemon_type:
             ctx.image = SNMPGateway.default_image
+        if type_ == MgmtGateway.daemon_type:
+            ctx.image = MgmtGateway.default_image
         if type_ == CephNvmeof.daemon_type:
             ctx.image = CephNvmeof.default_image
         if type_ in Tracing.components:
@@ -855,6 +859,10 @@ def create_daemon_dirs(
         sg = SNMPGateway.init(ctx, fsid, ident.daemon_id)
         sg.create_daemon_conf()
 
+    elif daemon_type == MgmtGateway.daemon_type:
+        cg = MgmtGateway.init(ctx, fsid, ident.daemon_id)
+        cg.create_daemon_dirs(data_dir, uid, gid)
+
     elif daemon_type == NodeProxy.daemon_type:
         node_proxy = NodeProxy.init(ctx, fsid, ident.daemon_id)
         node_proxy.create_daemon_dirs(data_dir, uid, gid)
@@ -3571,6 +3579,9 @@ def list_daemons(
                                 elif daemon_type == SNMPGateway.daemon_type:
                                     version = SNMPGateway.get_version(ctx, fsid, daemon_id)
                                     seen_versions[image_id] = version
+                                elif daemon_type == MgmtGateway.daemon_type:
+                                    version = MgmtGateway.get_version(ctx, container_id)
+                                    seen_versions[image_id] = version
                                 else:
                                     logger.warning('version for unknown daemon type %s' % daemon_type)
                         else:
index a6cf4389ff619942585cf662999042b8c45195c4..41add9fd10dac46e84ea24b1aa642475fe325c76 100644 (file)
@@ -19,6 +19,7 @@ DEFAULT_JAEGER_COLLECTOR_IMAGE = 'quay.io/jaegertracing/jaeger-collector:1.29'
 DEFAULT_JAEGER_AGENT_IMAGE = 'quay.io/jaegertracing/jaeger-agent:1.29'
 DEFAULT_JAEGER_QUERY_IMAGE = 'quay.io/jaegertracing/jaeger-query:1.29'
 DEFAULT_SMB_IMAGE = 'quay.io/samba.org/samba-server:devbuilds-centos-amd64'
+DEFAULT_NGINX_IMAGE = 'quay.io/ceph/nginx:1.26.1'
 DEFAULT_REGISTRY = 'docker.io'  # normalize unqualified digests to this
 # ------------------------------------------------------------------------------
 
index 1a9d2d568bcfc639f3b481c71b97f91250ac3a5f..279f6f1a898b7a4d85c4aa484d18d70fc2771933 100644 (file)
@@ -9,6 +9,7 @@ from .smb import SMB
 from .snmp import SNMPGateway
 from .tracing import Tracing
 from .node_proxy import NodeProxy
+from .mgmt_gateway import MgmtGateway
 
 __all__ = [
     'Ceph',
@@ -25,4 +26,5 @@ __all__ = [
     'SNMPGateway',
     'Tracing',
     'NodeProxy',
+    'MgmtGateway',
 ]
diff --git a/src/cephadm/cephadmlib/daemons/mgmt_gateway.py b/src/cephadm/cephadmlib/daemons/mgmt_gateway.py
new file mode 100644 (file)
index 0000000..f3b28bc
--- /dev/null
@@ -0,0 +1,174 @@
+import logging
+import os
+from typing import Dict, List, Tuple, Optional
+import re
+
+from ..call_wrappers import call, CallVerbosity
+from ..container_daemon_form import ContainerDaemonForm, daemon_to_container
+from ..container_types import CephContainer
+from ..context import CephadmContext
+from ..context_getters import fetch_configs
+from ..daemon_form import register as register_daemon_form
+from ..daemon_identity import DaemonIdentity
+from ..deployment_utils import to_deployment_container
+from ..constants import DEFAULT_NGINX_IMAGE
+from ..data_utils import dict_get, is_fsid
+from ..file_utils import populate_files, makedirs, recursive_chown
+from ..exceptions import Error
+
+logger = logging.getLogger()
+
+
+@register_daemon_form
+class MgmtGateway(ContainerDaemonForm):
+    """Defines an MgmtGateway container"""
+
+    daemon_type = 'mgmt-gateway'
+    required_files = [
+        'nginx.conf',
+        'nginx_external_server.conf',
+        'nginx_internal_server.conf',
+        'nginx_internal.crt',
+        'nginx_internal.key',
+    ]
+
+    default_image = DEFAULT_NGINX_IMAGE
+
+    @classmethod
+    def for_daemon_type(cls, daemon_type: str) -> bool:
+        return cls.daemon_type == daemon_type
+
+    def __init__(
+        self,
+        ctx: CephadmContext,
+        fsid: str,
+        daemon_id: str,
+        config_json: Dict,
+        image: str = DEFAULT_NGINX_IMAGE,
+    ):
+        self.ctx = ctx
+        self.fsid = fsid
+        self.daemon_id = daemon_id
+        self.image = image
+        self.files = dict_get(config_json, 'files', {})
+        self.validate()
+
+    @classmethod
+    def init(
+        cls, ctx: CephadmContext, fsid: str, daemon_id: str
+    ) -> 'MgmtGateway':
+        return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image)
+
+    @classmethod
+    def create(
+        cls, ctx: CephadmContext, ident: DaemonIdentity
+    ) -> 'MgmtGateway':
+        return cls.init(ctx, ident.fsid, ident.daemon_id)
+
+    @property
+    def identity(self) -> DaemonIdentity:
+        return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
+
+    def validate(self) -> None:
+        if not is_fsid(self.fsid):
+            raise Error(f'not an fsid: {self.fsid}')
+        if not self.daemon_id:
+            raise Error(f'invalid daemon_id: {self.daemon_id}')
+        if not self.image:
+            raise Error(f'invalid image: {self.image}')
+
+        # check for the required files
+        if self.required_files:
+            for fname in self.required_files:
+                if fname not in self.files:
+                    raise Error(
+                        'required file missing from config-json: %s' % fname
+                    )
+
+    def container(self, ctx: CephadmContext) -> CephContainer:
+        ctr = daemon_to_container(ctx, self)
+        return to_deployment_container(ctx, ctr)
+
+    def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
+        return 65534, 65534  # nobody, nobody
+
+    def get_daemon_args(self) -> List[str]:
+        return []
+
+    def default_entrypoint(self) -> str:
+        return ''
+
+    def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
+        """Create files under the container data dir"""
+        if not os.path.isdir(data_dir):
+            raise OSError('data_dir is not a directory: %s' % (data_dir))
+        logger.info('Writing mgmt-gateway config...')
+        config_dir = os.path.join(data_dir, 'etc/')
+        makedirs(config_dir, uid, gid, 0o755)
+        recursive_chown(config_dir, uid, gid)
+        populate_files(config_dir, self.files, uid, gid)
+
+    def _get_container_mounts(self, data_dir: str) -> Dict[str, str]:
+        mounts: Dict[str, str] = {}
+        mounts[
+            os.path.join(data_dir, 'nginx.conf')
+        ] = '/etc/nginx/nginx.conf:Z'
+        return mounts
+
+    @staticmethod
+    def get_version(ctx: CephadmContext, container_id: str) -> Optional[str]:
+        """Return the version of the Nginx container"""
+        version = None
+        out, err, code = call(
+            ctx,
+            [
+                ctx.container_engine.path,
+                'exec',
+                container_id,
+                'nginx',
+                '-v',
+            ],
+            verbosity=CallVerbosity.QUIET,
+        )
+        if code == 0:
+            # nginx is using stderr to print the version!!
+            match = re.search(r'nginx version:\s*nginx\/(.+)', err)
+            if match:
+                version = match.group(1)
+        return version
+
+    def customize_container_mounts(
+        self, ctx: CephadmContext, mounts: Dict[str, str]
+    ) -> None:
+        data_dir = self.identity.data_dir(ctx.data_dir)
+        mounts.update(
+            {
+                os.path.join(
+                    data_dir, 'etc/nginx.conf'
+                ): '/etc/nginx/nginx.conf:Z',
+                os.path.join(
+                    data_dir, 'etc/nginx_internal_server.conf'
+                ): '/etc/nginx_internal_server.conf:Z',
+                os.path.join(
+                    data_dir, 'etc/nginx_external_server.conf'
+                ): '/etc/nginx_external_server.conf:Z',
+                os.path.join(
+                    data_dir, 'etc/nginx_internal.crt'
+                ): '/etc/nginx/ssl/nginx_internal.crt:Z',
+                os.path.join(
+                    data_dir, 'etc/nginx_internal.key'
+                ): '/etc/nginx/ssl/nginx_internal.key:Z',
+            }
+        )
+
+        if 'nginx.crt' in self.files:
+            mounts.update(
+                {
+                    os.path.join(
+                        data_dir, 'etc/nginx.crt'
+                    ): '/etc/nginx/ssl/nginx.crt:Z',
+                    os.path.join(
+                        data_dir, 'etc/nginx.key'
+                    ): '/etc/nginx/ssl/nginx.key:Z',
+                }
+            )
index aa93ebe7305e0ba836bd4880a7433b3bb3d8b303..55e6a9458f0ffbf6681170a396a63f79286811e2 100644 (file)
@@ -260,6 +260,7 @@ class Monitoring(ContainerDaemonForm):
                 retention_size = config.get(
                     'retention_size', '0'
                 )  # default to disabled
+                use_url_prefix = config.get('use_url_prefix', False)
                 r += [f'--storage.tsdb.retention.time={retention_time}']
                 r += [f'--storage.tsdb.retention.size={retention_size}']
                 scheme = 'http'
@@ -271,10 +272,17 @@ class Monitoring(ContainerDaemonForm):
                     # use the first ipv4 (if any) otherwise use the first ipv6
                     addr = next(iter(ipv4_addrs or ipv6_addrs), None)
                     host = wrap_ipv6(addr) if addr else host
-                r += [f'--web.external-url={scheme}://{host}:{port}']
+                if use_url_prefix:
+                    r += [
+                        f'--web.external-url={scheme}://{host}:{port}/prometheus'
+                    ]
+                    r += ['--web.route-prefix=/prometheus/']
+                else:
+                    r += [f'--web.external-url={scheme}://{host}:{port}']
             r += [f'--web.listen-address={ip}:{port}']
         if daemon_type == 'alertmanager':
             config = fetch_configs(ctx)
+            use_url_prefix = config.get('use_url_prefix', False)
             peers = config.get('peers', list())  # type: ignore
             for peer in peers:
                 r += ['--cluster.peer={}'.format(peer)]
@@ -284,6 +292,8 @@ class Monitoring(ContainerDaemonForm):
                 pass
             # some alertmanager, by default, look elsewhere for a config
             r += ['--config.file=/etc/alertmanager/alertmanager.yml']
+            if use_url_prefix:
+                r += ['--web.route-prefix=/alertmanager']
         if daemon_type == 'promtail':
             r += ['--config.expand-env']
         if daemon_type == 'prometheus':
index b86b6ff80b901782d6958aad30bdfb109a2302cb..492a9a98d34dae6160cf793471e37d1b0c3b62b2 100644 (file)
@@ -1889,6 +1889,7 @@ class CertKeyStore():
             'iscsi_ssl_cert': {},  # service-name -> cert
             'ingress_ssl_cert': {},  # service-name -> cert
             'agent_endpoint_root_cert': Cert(),  # cert
+            'mgmt_gw_root_cert': Cert(),  # cert
             'service_discovery_root_cert': Cert(),  # cert
             'grafana_cert': {},  # host -> cert
             'alertmanager_cert': {},  # host -> cert
@@ -1901,6 +1902,7 @@ class CertKeyStore():
         self.known_keys = {
             'agent_endpoint_key': PrivKey(),  # key
             'service_discovery_key': PrivKey(),  # key
+            'mgmt_gw_root_key': PrivKey(),  # cert
             'grafana_key': {},  # host -> key
             'alertmanager_key': {},  # host -> key
             'prometheus_key': {},  # host -> key
index b68d571de68cc5662d709d005d90cf9b84d828fe..bd7981edc56f1cc6d94c8b3bdca4f9ef43f71dff 100644 (file)
@@ -68,6 +68,7 @@ from .services.ingress import IngressService
 from .services.container import CustomContainerService
 from .services.iscsi import IscsiService
 from .services.nvmeof import NvmeofService
+from .services.mgmt_gateway import MgmtGatewayService
 from .services.nfs import NFSService
 from .services.osd import OSDRemovalQueue, OSDService, OSD, NotFoundError
 from .services.monitoring import GrafanaService, AlertmanagerService, PrometheusService, \
@@ -139,6 +140,7 @@ DEFAULT_SNMP_GATEWAY_IMAGE = 'docker.io/maxwo/snmp-notifier:v1.2.1'
 DEFAULT_ELASTICSEARCH_IMAGE = 'quay.io/omrizeneva/elasticsearch:6.8.23'
 DEFAULT_JAEGER_COLLECTOR_IMAGE = 'quay.io/jaegertracing/jaeger-collector:1.29'
 DEFAULT_JAEGER_AGENT_IMAGE = 'quay.io/jaegertracing/jaeger-agent:1.29'
+DEFAULT_NGINX_IMAGE = 'quay.io/ceph/nginx:1.26.1'
 DEFAULT_JAEGER_QUERY_IMAGE = 'quay.io/jaegertracing/jaeger-query:1.29'
 DEFAULT_SAMBA_IMAGE = 'quay.io/samba.org/samba-server:devbuilds-centos-amd64'
 # ------------------------------------------------------------------------------
@@ -277,6 +279,11 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
             default=DEFAULT_SNMP_GATEWAY_IMAGE,
             desc='SNMP Gateway container image',
         ),
+        Option(
+            'container_image_nginx',
+            default=DEFAULT_NGINX_IMAGE,
+            desc='Nginx container image',
+        ),
         Option(
             'container_image_elasticsearch',
             default=DEFAULT_ELASTICSEARCH_IMAGE,
@@ -562,6 +569,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
             self.container_image_haproxy = ''
             self.container_image_keepalived = ''
             self.container_image_snmp_gateway = ''
+            self.container_image_nginx = ''
             self.container_image_elasticsearch = ''
             self.container_image_jaeger_agent = ''
             self.container_image_jaeger_collector = ''
@@ -708,6 +716,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
             RgwService,
             SMBService,
             SNMPGatewayService,
+            MgmtGatewayService,
         ]
 
         # https://github.com/python/mypy/issues/8993
@@ -918,7 +927,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
             'mon', 'crash', 'ceph-exporter', 'node-proxy',
             'prometheus', 'node-exporter', 'grafana', 'alertmanager',
             'container', 'agent', 'snmp-gateway', 'loki', 'promtail',
-            'elasticsearch', 'jaeger-collector', 'jaeger-agent', 'jaeger-query'
+            'elasticsearch', 'jaeger-collector', 'jaeger-agent', 'jaeger-query', 'mgmt-gateway'
         ]
         if forcename:
             if len([d for d in existing if d.daemon_id == forcename]):
@@ -1650,6 +1659,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
                 'prometheus': self.container_image_prometheus,
                 'promtail': self.container_image_promtail,
                 'snmp-gateway': self.container_image_snmp_gateway,
+                'mgmt-gateway': self.container_image_nginx,
                 # The image can't be resolved here, the necessary information
                 # is only available when a container is deployed (given
                 # via spec).
@@ -2926,17 +2936,18 @@ Then run the following:
                 deps.append('ingress')
             # add dependency on ceph-exporter daemons
             deps += [d.name() for d in self.cache.get_daemons_by_service('ceph-exporter')]
+            deps += [d.name() for d in self.cache.get_daemons_by_service('mgmt-gateway')]
             if self.secure_monitoring_stack:
                 if prometheus_user and prometheus_password:
                     deps.append(f'{hash(prometheus_user + prometheus_password)}')
                 if alertmanager_user and alertmanager_password:
                     deps.append(f'{hash(alertmanager_user + alertmanager_password)}')
         elif daemon_type == 'grafana':
-            deps += get_daemon_names(['prometheus', 'loki'])
+            deps += get_daemon_names(['prometheus', 'loki', 'mgmt-gateway'])
             if self.secure_monitoring_stack and prometheus_user and prometheus_password:
                 deps.append(f'{hash(prometheus_user + prometheus_password)}')
         elif daemon_type == 'alertmanager':
-            deps += get_daemon_names(['mgr', 'alertmanager', 'snmp-gateway'])
+            deps += get_daemon_names(['mgr', 'alertmanager', 'snmp-gateway', 'mgmt-gateway'])
             if self.secure_monitoring_stack and alertmanager_user and alertmanager_password:
                 deps.append(f'{hash(alertmanager_user + alertmanager_password)}')
         elif daemon_type == 'promtail':
@@ -2947,11 +2958,15 @@ Then run the following:
                 port = dd.ports[0] if dd.ports else JaegerCollectorService.DEFAULT_SERVICE_PORT
                 deps.append(build_url(host=dd.hostname, port=port).lstrip('/'))
             deps = sorted(deps)
+        elif daemon_type == 'mgmt-gateway':
+            # url_prefix for monitoring daemons depends on the presence of mgmt-gateway
+            # while dashboard urls depend on the mgr daemons
+            deps += get_daemon_names(['mgr', 'grafana', 'prometheus', 'alertmanager'])
         else:
-            # TODO(redo): some error message!
+            # this daemon type doesn't need deps mgmt
             pass
 
-        if daemon_type in ['prometheus', 'node-exporter', 'alertmanager', 'grafana']:
+        if daemon_type in ['prometheus', 'node-exporter', 'alertmanager', 'grafana', 'mgmt-gateway']:
             deps.append(f'secure_monitoring_stack:{self.secure_monitoring_stack}')
 
         return sorted(deps)
@@ -3337,6 +3352,7 @@ Then run the following:
                 'crash': PlacementSpec(host_pattern='*'),
                 'container': PlacementSpec(count=1),
                 'snmp-gateway': PlacementSpec(count=1),
+                'mgmt-gateway': PlacementSpec(count=1),
                 'elasticsearch': PlacementSpec(count=1),
                 'jaeger-agent': PlacementSpec(host_pattern='*'),
                 'jaeger-collector': PlacementSpec(count=1),
@@ -3475,6 +3491,10 @@ Then run the following:
     def apply_smb(self, spec: ServiceSpec) -> str:
         return self._apply(spec)
 
+    @handle_orch_error
+    def apply_mgmt_gateway(self, spec: ServiceSpec) -> str:
+        return self._apply(spec)
+
     @handle_orch_error
     def set_unmanaged(self, service_name: str, value: bool) -> str:
         return self.spec_store.set_unmanaged(service_name, value)
index 561a3e085b8633af0170934f699d42ae0863f78f..59e06bbd024e6338704bcad4f78a6d87c4691533 100644 (file)
@@ -41,7 +41,7 @@ if TYPE_CHECKING:
 
 logger = logging.getLogger(__name__)
 
-REQUIRES_POST_ACTIONS = ['grafana', 'iscsi', 'prometheus', 'alertmanager', 'rgw', 'nvmeof']
+REQUIRES_POST_ACTIONS = ['grafana', 'iscsi', 'prometheus', 'alertmanager', 'rgw', 'nvmeof', 'mgmt-gateway']
 
 WHICH = ssh.RemoteExecutable('which')
 CEPHADM_EXE = ssh.RemoteExecutable('/usr/bin/cephadm')
@@ -1093,10 +1093,12 @@ class CephadmServe:
                 self.log.debug(f'{dd.name()} deps {last_deps} -> {deps}')
                 self.log.info(f'Reconfiguring {dd.name()} (dependencies changed)...')
                 action = 'reconfig'
-                # we need only redeploy if secure_monitoring_stack value has changed:
+                # we need only redeploy if secure_monitoring_stack or mgmt-gateway value has changed:
+                # TODO(redo): check if we should just go always with redeploy (it's fast enough)
                 if dd.daemon_type in ['prometheus', 'node-exporter', 'alertmanager']:
                     diff = list(set(last_deps).symmetric_difference(set(deps)))
-                    if any('secure_monitoring_stack' in e for e in diff):
+                    REDEPLOY_TRIGGERS = ['secure_monitoring_stack', 'mgmt-gateway']
+                    if any(svc in e for e in diff for svc in REDEPLOY_TRIGGERS):
                         action = 'redeploy'
                 elif dd.daemon_type == 'jaeger-agent':
                     # changes to jaeger-agent deps affect the way the unit.run for
index 4b22400b49ef0849782e3187a62dda51fa2e0f9d..ec9df98413a3d69fc3adaaaeac1aa4b7630246f6 100644 (file)
@@ -5,6 +5,8 @@ import re
 import socket
 import time
 from abc import ABCMeta, abstractmethod
+import ipaddress
+from urllib.parse import urlparse
 from typing import TYPE_CHECKING, List, Callable, TypeVar, \
     Optional, Dict, Any, Tuple, NewType, cast
 
@@ -73,6 +75,61 @@ def simplified_keyring(entity: str, contents: str) -> str:
     return keyring
 
 
+def get_dashboard_endpoints(svc: 'CephadmService') -> Tuple[List[str], Optional[str]]:
+    dashboard_endpoints: List[str] = []
+    port = None
+    protocol = None
+    mgr_map = svc.mgr.get('mgr_map')
+    url = mgr_map.get('services', {}).get('dashboard', None)
+    if url:
+        p_result = urlparse(url.rstrip('/'))
+        protocol = p_result.scheme
+        port = p_result.port
+        # assume that they are all dashboards on the same port as the active mgr.
+        for dd in svc.mgr.cache.get_daemons_by_service('mgr'):
+            if not port:
+                continue
+            assert dd.hostname is not None
+            addr = svc._inventory_get_fqdn(dd.hostname)
+            dashboard_endpoints.append(f'{addr}:{port}')
+
+    return dashboard_endpoints, protocol
+
+
+def get_dashboard_urls(svc: 'CephadmService') -> List[str]:
+    # dashboard(s)
+    dashboard_urls: List[str] = []
+    mgr_map = svc.mgr.get('mgr_map')
+    port = None
+    proto = None  # http: or https:
+    url = mgr_map.get('services', {}).get('dashboard', None)
+    if url:
+        p_result = urlparse(url.rstrip('/'))
+        hostname = socket.getfqdn(p_result.hostname)
+        try:
+            ip = ipaddress.ip_address(hostname)
+        except ValueError:
+            pass
+        else:
+            if ip.version == 6:
+                hostname = f'[{hostname}]'
+        dashboard_urls.append(f'{p_result.scheme}://{hostname}:{p_result.port}{p_result.path}')
+        proto = p_result.scheme
+        port = p_result.port
+
+    # assume that they are all dashboards on the same port as the active mgr.
+    for dd in svc.mgr.cache.get_daemons_by_service('mgr'):
+        if not port:
+            continue
+        if dd.daemon_id == svc.mgr.get_mgr_id():
+            continue
+        assert dd.hostname is not None
+        addr = svc._inventory_get_fqdn(dd.hostname)
+        dashboard_urls.append(build_url(scheme=proto, host=addr, port=port).rstrip('/'))
+
+    return dashboard_urls
+
+
 class CephadmDaemonDeploySpec:
     # typing.NamedTuple + Generic is broken in py36
     def __init__(self, host: str, daemon_id: str,
@@ -336,22 +393,21 @@ class CephadmService(metaclass=ABCMeta):
         addr = self.mgr.inventory.get_addr(hostname)
         return socket.getfqdn(addr)
 
-    def _set_service_url_on_dashboard(self,
-                                      service_name: str,
-                                      get_mon_cmd: str,
-                                      set_mon_cmd: str,
-                                      service_url: str) -> None:
-        """A helper to get and set service_url via Dashboard's MON command.
-
-           If result of get_mon_cmd differs from service_url, set_mon_cmd will
+    def _set_value_on_dashboard(self,
+                                service_name: str,
+                                get_mon_cmd: str,
+                                set_mon_cmd: str,
+                                new_value: str) -> None:
+        """A helper to get and set values via Dashboard's MON command.
+           If result of get_mon_cmd differs from the new_value, set_mon_cmd will
            be sent to set the service_url.
         """
         def get_set_cmd_dicts(out: str) -> List[dict]:
             cmd_dict = {
                 'prefix': set_mon_cmd,
-                'value': service_url
+                'value': new_value
             }
-            return [cmd_dict] if service_url != out else []
+            return [cmd_dict] if new_value != out else []
 
         self._check_and_set_dashboard(
             service_name=service_name,
diff --git a/src/pybind/mgr/cephadm/services/mgmt_gateway.py b/src/pybind/mgr/cephadm/services/mgmt_gateway.py
new file mode 100644 (file)
index 0000000..7ba59fa
--- /dev/null
@@ -0,0 +1,146 @@
+import logging
+from typing import List, Any, Tuple, Dict, cast
+
+from orchestrator import DaemonDescription
+from ceph.deployment.service_spec import MgmtGatewaySpec, GrafanaSpec
+from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec, get_dashboard_endpoints
+from cephadm.ssl_cert_utils import SSLCerts
+
+logger = logging.getLogger(__name__)
+
+
+class MgmtGatewayService(CephadmService):
+    TYPE = 'mgmt-gateway'
+    SVC_TEMPLATE_PATH = 'services/mgmt-gateway/nginx.conf.j2'
+    EXTERNAL_SVC_TEMPLATE_PATH = 'services/mgmt-gateway/external_server.conf.j2'
+    INTERNAL_SVC_TEMPLATE_PATH = 'services/mgmt-gateway/internal_server.conf.j2'
+    INTERNAL_SERVICE_PORT = 29443
+
+    def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
+        assert self.TYPE == daemon_spec.daemon_type
+        daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+        return daemon_spec
+
+    def get_service_endpoints(self, service_name: str) -> List[str]:
+        srv_entries = []
+        for dd in self.mgr.cache.get_daemons_by_service(service_name):
+            assert dd.hostname is not None
+            addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
+            port = dd.ports[0] if dd.ports else None
+            srv_entries.append(f'{addr}:{port}')
+        return srv_entries
+
+    def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
+        if daemon_descrs:
+            return daemon_descrs[0]
+        # if empty list provided, return empty Daemon Desc
+        return DaemonDescription()
+
+    def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
+        # we adjust the standby behaviour so rev-proxy can pick correctly the active instance
+        self.mgr.set_module_option_ex('dashboard', 'standby_error_status_code', '503')
+        self.mgr.set_module_option_ex('dashboard', 'standby_behaviour', 'error')
+
+    def get_certificates(self, svc_spec: MgmtGatewaySpec, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str, str, str]:
+        self.ssl_certs = SSLCerts()
+        old_cert = self.mgr.cert_key_store.get_cert('mgmt_gw_root_cert')
+        old_key = self.mgr.cert_key_store.get_key('mgmt_gw_root_key')
+        if old_cert and old_key:
+            self.ssl_certs.load_root_credentials(old_cert, old_key)
+        else:
+            self.ssl_certs.generate_root_cert(self.mgr.get_mgr_ip())
+            self.mgr.cert_key_store.save_cert('mgmt_gw_root_cert', self.ssl_certs.get_root_cert())
+            self.mgr.cert_key_store.save_key('mgmt_gw_root_key', self.ssl_certs.get_root_key())
+
+        node_ip = self.mgr.inventory.get_addr(daemon_spec.host)
+        host_fqdn = self._inventory_get_fqdn(daemon_spec.host)
+        internal_cert, internal_pkey = self.ssl_certs.generate_cert(host_fqdn, node_ip)
+        cert = svc_spec.ssl_certificate
+        pkey = svc_spec.ssl_certificate_key
+        if not (cert and pkey):
+            # In case the user has not provided certificates then we generate self-signed ones
+            cert, pkey = self.ssl_certs.generate_cert(host_fqdn, node_ip)
+
+        return internal_cert, internal_pkey, cert, pkey
+
+    def get_mgmt_gateway_deps(self) -> List[str]:
+        # url_prefix for the following services depends on the presence of mgmt-gateway
+        deps: List[str] = []
+        deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('prometheus')]
+        deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('alertmanager')]
+        deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('grafana')]
+        # secure_monitoring_stack affects the protocol used by monitoring services
+        deps += [f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}']
+        for dd in self.mgr.cache.get_daemons_by_service('mgr'):
+            # we consider mgr a dep even if the dashboard is disabled
+            # in order to be consistent with _calc_daemon_deps().
+            deps.append(dd.name())
+
+        return deps
+
+    def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
+        assert self.TYPE == daemon_spec.daemon_type
+        svc_spec = cast(MgmtGatewaySpec, self.mgr.spec_store[daemon_spec.service_name].spec)
+        dashboard_endpoints, dashboard_scheme = get_dashboard_endpoints(self)
+        scheme = 'https' if self.mgr.secure_monitoring_stack else 'http'
+
+        prometheus_endpoints = self.get_service_endpoints('prometheus')
+        alertmanager_endpoints = self.get_service_endpoints('alertmanager')
+        grafana_endpoints = self.get_service_endpoints('grafana')
+        try:
+            grafana_spec = cast(GrafanaSpec, self.mgr.spec_store['grafana'].spec)
+            grafana_protocol = grafana_spec.protocol
+        except Exception:
+            grafana_protocol = 'https'  # defualt to https just for UT
+
+        main_context = {
+            'dashboard_endpoints': dashboard_endpoints,
+            'prometheus_endpoints': prometheus_endpoints,
+            'alertmanager_endpoints': alertmanager_endpoints,
+            'grafana_endpoints': grafana_endpoints
+        }
+        external_server_context = {
+            'spec': svc_spec,
+            'dashboard_scheme': dashboard_scheme,
+            'grafana_scheme': grafana_protocol,
+            'prometheus_scheme': scheme,
+            'alertmanager_scheme': scheme,
+            'dashboard_endpoints': dashboard_endpoints,
+            'prometheus_endpoints': prometheus_endpoints,
+            'alertmanager_endpoints': alertmanager_endpoints,
+            'grafana_endpoints': grafana_endpoints
+        }
+        internal_server_context = {
+            'spec': svc_spec,
+            'internal_port': self.INTERNAL_SERVICE_PORT,
+            'grafana_scheme': grafana_protocol,
+            'prometheus_scheme': scheme,
+            'alertmanager_scheme': scheme,
+            'prometheus_endpoints': prometheus_endpoints,
+            'alertmanager_endpoints': alertmanager_endpoints,
+            'grafana_endpoints': grafana_endpoints
+        }
+
+        internal_cert, internal_pkey, cert, pkey = self.get_certificates(svc_spec, daemon_spec)
+        daemon_config = {
+            "files": {
+                "nginx.conf": self.mgr.template.render(self.SVC_TEMPLATE_PATH, main_context),
+                "nginx_external_server.conf": self.mgr.template.render(self.EXTERNAL_SVC_TEMPLATE_PATH, external_server_context),
+                "nginx_internal_server.conf": self.mgr.template.render(self.INTERNAL_SVC_TEMPLATE_PATH, internal_server_context),
+                "nginx_internal.crt": internal_cert,
+                "nginx_internal.key": internal_pkey
+            }
+        }
+        if not svc_spec.disable_https:
+            daemon_config["files"]["nginx.crt"] = cert
+            daemon_config["files"]["nginx.key"] = pkey
+
+        return daemon_config, sorted(self.get_mgmt_gateway_deps())
+
+    def pre_remove(self, daemon: DaemonDescription) -> None:
+        """
+        Called before mgmt-gateway daemon is removed.
+        """
+        # reset the standby dashboard redirection behaviour
+        self.mgr.set_module_option_ex('dashboard', 'standby_error_status_code', '500')
+        self.mgr.set_module_option_ex('dashboard', 'standby_behaviour', 'redirect')
index c12c637c39d172c1786e6cc56351d8e6fa47d42a..71a9b60d31f23dde4c1e20683969ec58071f5807 100644 (file)
@@ -1,17 +1,16 @@
 import errno
-import ipaddress
 import logging
 import os
 import socket
 from typing import List, Any, Tuple, Dict, Optional, cast
-from urllib.parse import urlparse
 
 from mgr_module import HandleCommandResult
 
 from orchestrator import DaemonDescription
 from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, \
-    SNMPGatewaySpec, PrometheusSpec
-from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec
+    SNMPGatewaySpec, PrometheusSpec, MgmtGatewaySpec
+from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec, get_dashboard_urls
+from cephadm.services.mgmt_gateway import MgmtGatewayService
 from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert, build_url, get_cert_issuer_info, password_hash
 from ceph.deployment.utils import wrap_ipv6
 
@@ -35,6 +34,9 @@ class GrafanaService(CephadmService):
             deps.append(f'{hash(prometheus_user + prometheus_password)}')
         deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}')
 
+        # add a dependency since url_prefix depends on the existence of mgmt-gateway
+        deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('mgmt-gateway')]
+
         prom_services = []  # type: List[str]
         for dd in self.mgr.cache.get_daemons_by_service('prometheus'):
             assert dd.hostname is not None
@@ -78,13 +80,15 @@ class GrafanaService(CephadmService):
                 daemon_spec.port_ips = {str(grafana_port): ip_to_bind_to}
                 grafana_ip = ip_to_bind_to
 
+        mgmt_gw_enabled = len(self.mgr.cache.get_daemons_by_service('mgmt-gateway')) > 0
         grafana_ini = self.mgr.template.render(
             'services/grafana/grafana.ini.j2', {
                 'anonymous_access': spec.anonymous_access,
                 'initial_admin_password': spec.initial_admin_password,
                 'http_port': grafana_port,
                 'protocol': spec.protocol,
-                'http_addr': grafana_ip
+                'http_addr': grafana_ip,
+                'use_url_prefix': mgmt_gw_enabled
             })
 
         if 'dashboard' in self.mgr.get('mgr_map')['modules'] and spec.initial_admin_password:
@@ -187,13 +191,36 @@ class GrafanaService(CephadmService):
         addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
         port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
         spec = cast(GrafanaSpec, self.mgr.spec_store[dd.service_name()].spec)
-        service_url = build_url(scheme=spec.protocol, host=addr, port=port)
-        self._set_service_url_on_dashboard(
-            'Grafana',
-            'dashboard get-grafana-api-url',
-            'dashboard set-grafana-api-url',
-            service_url
-        )
+
+        mgmt_gw_daemons = self.mgr.cache.get_daemons_by_service('mgmt-gateway')
+        if mgmt_gw_daemons:
+            dd = mgmt_gw_daemons[0]
+            assert dd.hostname is not None
+            mgmt_gw_spec = cast(MgmtGatewaySpec, self.mgr.spec_store['mgmt-gateway'].spec)
+            mgmt_gw_port = dd.ports[0] if dd.ports else None
+            mgmt_gw_addr = self._inventory_get_fqdn(dd.hostname)
+            protocol = 'http' if mgmt_gw_spec.disable_https else 'https'
+            mgmt_gw_external_endpoint = build_url(scheme=protocol, host=mgmt_gw_addr, port=mgmt_gw_port)
+            self._set_value_on_dashboard(
+                'Grafana',
+                'dashboard get-grafana-api-url',
+                'dashboard set-grafana-api-url',
+                f'{mgmt_gw_external_endpoint}/grafana'
+            )
+            self._set_value_on_dashboard(
+                'Grafana',
+                'dashboard get-grafana-api-ssl-verify',
+                'dashboard set-grafana-api-ssl-verify',
+                'false'
+            )
+        else:
+            service_url = build_url(scheme=spec.protocol, host=addr, port=port)
+            self._set_value_on_dashboard(
+                'Grafana',
+                'dashboard get-grafana-api-url',
+                'dashboard set-grafana-api-url',
+                service_url
+            )
 
     def pre_remove(self, daemon: DaemonDescription) -> None:
         """
@@ -240,44 +267,15 @@ class AlertmanagerService(CephadmService):
                 user_data['default_webhook_urls'], list):
             default_webhook_urls.extend(user_data['default_webhook_urls'])
 
-        # dashboard(s)
-        dashboard_urls: List[str] = []
-        snmp_gateway_urls: List[str] = []
-        mgr_map = self.mgr.get('mgr_map')
-        port = None
-        proto = None  # http: or https:
-        url = mgr_map.get('services', {}).get('dashboard', None)
-        if url:
-            p_result = urlparse(url.rstrip('/'))
-            hostname = socket.getfqdn(p_result.hostname)
-
-            try:
-                ip = ipaddress.ip_address(hostname)
-            except ValueError:
-                pass
-            else:
-                if ip.version == 6:
-                    hostname = f'[{hostname}]'
-
-            dashboard_urls.append(
-                f'{p_result.scheme}://{hostname}:{p_result.port}{p_result.path}')
-            proto = p_result.scheme
-            port = p_result.port
-
+        # add a dependency since url_prefix depends on the existence of mgmt-gateway
+        deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('mgmt-gateway')]
         # scan all mgrs to generate deps and to get standbys too.
-        # assume that they are all on the same port as the active mgr.
         for dd in self.mgr.cache.get_daemons_by_service('mgr'):
             # we consider mgr a dep even if the dashboard is disabled
             # in order to be consistent with _calc_daemon_deps().
             deps.append(dd.name())
-            if not port:
-                continue
-            if dd.daemon_id == self.mgr.get_mgr_id():
-                continue
-            assert dd.hostname is not None
-            addr = self._inventory_get_fqdn(dd.hostname)
-            dashboard_urls.append(build_url(scheme=proto, host=addr, port=port).rstrip('/'))
 
+        snmp_gateway_urls: List[str] = []
         for dd in self.mgr.cache.get_daemons_by_service('snmp-gateway'):
             assert dd.hostname is not None
             assert dd.ports
@@ -289,7 +287,7 @@ class AlertmanagerService(CephadmService):
 
         context = {
             'secure_monitoring_stack': self.mgr.secure_monitoring_stack,
-            'dashboard_urls': dashboard_urls,
+            'dashboard_urls': get_dashboard_urls(self),
             'default_webhook_urls': default_webhook_urls,
             'snmp_gateway_urls': snmp_gateway_urls,
             'secure': secure,
@@ -304,8 +302,8 @@ class AlertmanagerService(CephadmService):
             addr = self._inventory_get_fqdn(dd.hostname)
             peers.append(build_url(host=addr, port=port).lstrip('/'))
 
+        mgmt_gw_enabled = len(self.mgr.cache.get_daemons_by_service('mgmt-gateway')) > 0
         deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}')
-
         if self.mgr.secure_monitoring_stack:
             alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials()
             if alertmanager_user and alertmanager_password:
@@ -332,14 +330,16 @@ class AlertmanagerService(CephadmService):
                     'root_cert.pem': self.mgr.http_server.service_discovery.ssl_certs.get_root_cert()
                 },
                 'peers': peers,
-                'web_config': '/etc/alertmanager/web.yml'
+                'web_config': '/etc/alertmanager/web.yml',
+                'use_url_prefix': mgmt_gw_enabled
             }, sorted(deps)
         else:
             return {
                 "files": {
                     "alertmanager.yml": yml
                 },
-                "peers": peers
+                "peers": peers,
+                'use_url_prefix': mgmt_gw_enabled
             }, sorted(deps)
 
     def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
@@ -355,13 +355,33 @@ class AlertmanagerService(CephadmService):
         addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
         port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
         protocol = 'https' if self.mgr.secure_monitoring_stack else 'http'
-        service_url = build_url(scheme=protocol, host=addr, port=port)
-        self._set_service_url_on_dashboard(
-            'AlertManager',
-            'dashboard get-alertmanager-api-host',
-            'dashboard set-alertmanager-api-host',
-            service_url
-        )
+
+        mgmt_gw_daemons = self.mgr.cache.get_daemons_by_service('mgmt-gateway')
+        if mgmt_gw_daemons:
+            dd = mgmt_gw_daemons[0]
+            assert dd.hostname is not None
+            mgmt_gw_addr = self._inventory_get_fqdn(dd.hostname)
+            mgmt_gw_internal_endpoint = build_url(scheme='https', host=mgmt_gw_addr, port=MgmtGatewayService.INTERNAL_SERVICE_PORT)
+            self._set_value_on_dashboard(
+                'AlertManager',
+                'dashboard get-alertmanager-api-host',
+                'dashboard set-alertmanager-api-host',
+                f'{mgmt_gw_internal_endpoint}/internal/alertmanager'
+            )
+            self._set_value_on_dashboard(
+                'Alertmanager',
+                'dashboard get-alertmanager-api-ssl-verify',
+                'dashboard set-alertmanager-api-ssl-verify',
+                'false'
+            )
+        else:
+            service_url = build_url(scheme=protocol, host=addr, port=port)
+            self._set_value_on_dashboard(
+                'AlertManager',
+                'dashboard get-alertmanager-api-host',
+                'dashboard set-alertmanager-api-host',
+                service_url
+            )
 
     def pre_remove(self, daemon: DaemonDescription) -> None:
         """
@@ -481,6 +501,7 @@ class PrometheusService(CephadmService):
             'prometheus_web_password': password_hash(prometheus_password),
         }
 
+        mgmt_gw_enabled = len(self.mgr.cache.get_daemons_by_service('mgmt-gateway')) > 0
         if self.mgr.secure_monitoring_stack:
             # NOTE: this prometheus root cert is managed by the prometheus module
             # we are using it in a read only fashion in the cephadm module
@@ -510,7 +531,8 @@ class PrometheusService(CephadmService):
                     'retention_time': retention_time,
                     'retention_size': retention_size,
                     'ip_to_bind_to': ip_to_bind_to,
-                    'web_config': '/etc/prometheus/web.yml'
+                    'web_config': '/etc/prometheus/web.yml',
+                    'use_url_prefix': mgmt_gw_enabled
                 }
         else:
             r = {
@@ -519,7 +541,8 @@ class PrometheusService(CephadmService):
                 },
                 'retention_time': retention_time,
                 'retention_size': retention_size,
-                'ip_to_bind_to': ip_to_bind_to
+                'ip_to_bind_to': ip_to_bind_to,
+                'use_url_prefix': mgmt_gw_enabled
             }
 
         # include alerts, if present in the container
@@ -563,6 +586,10 @@ class PrometheusService(CephadmService):
             if alertmanager_user and alertmanager_password:
                 deps.append(f'{hash(alertmanager_user + alertmanager_password)}')
         deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}')
+
+        # add a dependency since url_prefix depends on the existence of mgmt-gateway
+        deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('mgmt-gateway')]
+
         # add dependency on ceph-exporter daemons
         deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('ceph-exporter')]
         deps += [s for s in ['node-exporter', 'alertmanager'] if self.mgr.cache.get_daemons_by_service(s)]
@@ -583,13 +610,33 @@ class PrometheusService(CephadmService):
         addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
         port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
         protocol = 'https' if self.mgr.secure_monitoring_stack else 'http'
-        service_url = build_url(scheme=protocol, host=addr, port=port)
-        self._set_service_url_on_dashboard(
-            'Prometheus',
-            'dashboard get-prometheus-api-host',
-            'dashboard set-prometheus-api-host',
-            service_url
-        )
+
+        mgmt_gw_daemons = self.mgr.cache.get_daemons_by_service('mgmt-gateway')
+        if mgmt_gw_daemons:
+            dd = mgmt_gw_daemons[0]
+            assert dd.hostname is not None
+            mgmt_gw_addr = self._inventory_get_fqdn(dd.hostname)
+            mgmt_gw_internal_endpoint = build_url(scheme='https', host=mgmt_gw_addr, port=MgmtGatewayService.INTERNAL_SERVICE_PORT)
+            self._set_value_on_dashboard(
+                'Prometheus',
+                'dashboard get-prometheus-api-host',
+                'dashboard set-prometheus-api-host',
+                f'{mgmt_gw_internal_endpoint}/internal/prometheus'
+            )
+            self._set_value_on_dashboard(
+                'Prometheus',
+                'dashboard get-prometheus-api-ssl-verify',
+                'dashboard set-prometheus-api-ssl-verify',
+                'false'
+            )
+        else:
+            service_url = build_url(scheme=protocol, host=addr, port=port)
+            self._set_value_on_dashboard(
+                'Prometheus',
+                'dashboard get-prometheus-api-host',
+                'dashboard set-prometheus-api-host',
+                service_url
+            )
 
     def pre_remove(self, daemon: DaemonDescription) -> None:
         """
index e6c7bce15245148047d9788d8de7e893ad2fc193..4d3d11e20831bc80f45d5438131f1f7b245c8a7b 100644 (file)
   cert_key = /etc/grafana/certs/cert_key
   http_port = {{ http_port }}
   http_addr = {{ http_addr }}
+{% if use_url_prefix %}
+  root_url = %(protocol)s://%(domain)s:%(http_port)s/grafana/
+  serve_from_sub_path = true
+{% endif %}
 [snapshots]
   external_enabled = false
 [security]
diff --git a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2 b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2
new file mode 100644 (file)
index 0000000..2220e8e
--- /dev/null
@@ -0,0 +1,75 @@
+
+server {
+{% if spec.disable_https %}
+    listen {{ spec.port or 80 }};
+{% else %}
+    listen                    {{ spec.port or 443 }} ssl;
+    listen                    [::]:{{ spec.port or 443 }} ssl;
+    ssl_certificate            /etc/nginx/ssl/nginx.crt;
+    ssl_certificate_key /etc/nginx/ssl/nginx.key;
+    {% if spec.ssl_protocols %}
+    ssl_protocols            {{ spec.ssl_protocols | join(' ') }};
+    {% else %}
+    ssl_protocols            TLSv1.3;
+    {% endif %}
+    {% if spec.ssl_ciphers %}
+    ssl_ciphers            {{ spec.ssl_ciphers | join(':') }};
+    {% else %}
+    # from:  https://ssl-config.mozilla.org/#server=nginx
+    ssl_ciphers              ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:DHE-RSA-CHACHA20-POLY1305;
+    {% endif %}
+
+    # Only return Nginx in server header, no extra info will be provided
+    server_tokens             {{ spec.server_tokens or 'off'}};
+
+    # Perfect Forward Secrecy(PFS) is frequently compromised without this
+    ssl_prefer_server_ciphers {{ spec.ssl_prefer_server_ciphers or 'on'}};
+
+    # Enable SSL session caching for improved performance
+    ssl_session_tickets       {{ spec.ssl_session_tickets or 'off'}};
+    ssl_session_timeout       {{ spec.ssl_session_timeout or '1d'}};
+    ssl_session_cache         {{ spec.ssl_session_cache or 'shared:SSL:10m'}};
+
+    # OCSP stapling
+    ssl_stapling              {{ spec.ssl_stapling or 'on'}};
+    ssl_stapling_verify       {{ spec.ssl_stapling_verify or 'on'}};
+    resolver_timeout 5s;
+
+    # Security headers
+    ## X-Content-Type-Options: avoid MIME type sniffing
+    add_header X-Content-Type-Options nosniff;
+    ## Strict Transport Security (HSTS): Yes
+    add_header Strict-Transport-Security "max-age=31536000; includeSubdomains; preload";
+    ## Enables the Cross-site scripting (XSS) filter in browsers.
+    add_header X-XSS-Protection "1; mode=block";
+    ## Content-Security-Policy (CSP): FIXME
+    # add_header Content-Security-Policy "default-src 'self'; script-src 'self'; object-src 'none'; base-uri 'none'; require-trusted-types-for 'script'; frame-ancestors 'self';";
+
+{% endif %}
+
+{% if dashboard_endpoints %}
+    location / {
+        proxy_pass {{ dashboard_scheme }}://dashboard_servers;
+        proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
+    }
+{% endif %}
+
+{% if grafana_endpoints %}
+    location /grafana {
+        rewrite ^/grafana/(.*) /$1 break;
+        proxy_pass {{ grafana_scheme }}://grafana_servers;
+    }
+{% endif %}
+
+{% if prometheus_endpoints %}
+    location /prometheus {
+        proxy_pass {{ prometheus_scheme }}://prometheus_servers;
+    }
+{% endif %}
+
+{% if alertmanager_endpoints %}
+    location /alertmanager {
+        proxy_pass {{ alertmanager_scheme }}://alertmanager_servers;
+    }
+{% endif %}
+}
diff --git a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2 b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2
new file mode 100644 (file)
index 0000000..6848c04
--- /dev/null
@@ -0,0 +1,31 @@
+
+server {
+    listen              {{ internal_port }} ssl;
+    listen              [::]:{{ internal_port }} ssl;
+    ssl_certificate     /etc/nginx/ssl/nginx_internal.crt;
+    ssl_certificate_key /etc/nginx/ssl/nginx_internal.key;
+    ssl_protocols       TLSv1.2 TLSv1.3;
+    ssl_ciphers         AES128-SHA:AES256-SHA:RC4-SHA:DES-CBC3-SHA:RC4-MD5;
+    ssl_prefer_server_ciphers on;
+
+{% if grafana_endpoints %}
+    location /internal/grafana {
+        rewrite ^/internal/grafana/(.*) /$1 break;
+        proxy_pass {{ grafana_scheme }}://grafana_servers;
+    }
+{% endif %}
+
+{% if prometheus_endpoints %}
+    location /internal/prometheus {
+        rewrite ^/internal/prometheus/(.*) /prometheus/$1 break;
+        proxy_pass {{ prometheus_scheme }}://prometheus_servers;
+    }
+{% endif %}
+
+{% if alertmanager_endpoints %}
+    location /internal/alertmanager {
+        rewrite ^/internal/alertmanager/(.*) /alertmanager/$1 break;
+        proxy_pass {{ alertmanager_scheme }}://alertmanager_servers;
+    }
+{% endif %}
+}
diff --git a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2 b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2
new file mode 100644 (file)
index 0000000..9ce6eb9
--- /dev/null
@@ -0,0 +1,44 @@
+
+# {{ cephadm_managed }}
+worker_rlimit_nofile 8192;
+
+events {
+    worker_connections 4096;
+}
+
+http {
+{% if dashboard_endpoints %}
+    upstream dashboard_servers {
+     {% for ep in dashboard_endpoints %}
+     server {{ ep }};
+     {% endfor %}
+    }
+{% endif %}
+
+{% if grafana_endpoints %}
+    upstream grafana_servers {
+     {% for ep in grafana_endpoints %}
+     server {{ ep }};
+     {% endfor %}
+    }
+{% endif %}
+
+{% if prometheus_endpoints %}
+    upstream prometheus_servers {
+     {% for ep in prometheus_endpoints %}
+     server {{ ep }};
+     {% endfor %}
+    }
+{% endif %}
+
+{% if alertmanager_endpoints %}
+    upstream alertmanager_servers {
+     {% for ep in alertmanager_endpoints %}
+     server {{ ep }};
+     {% endfor %}
+    }
+{% endif %}
+
+    include /etc/nginx_external_server.conf;
+    include /etc/nginx_internal_server.conf;
+}
index d9b1f5f8e784c3f9e72f04347217f0c061d3cb64..996c1d5248cdfaf9c1964a9a2456b977c371996d 100644 (file)
@@ -1725,6 +1725,7 @@ class TestCephadm(object):
             'ingress_ssl_cert': False,
             'agent_endpoint_root_cert': False,
             'service_discovery_root_cert': False,
+            'mgmt_gw_root_cert': False,
             'grafana_cert': False,
             'alertmanager_cert': False,
             'prometheus_cert': False,
@@ -1774,6 +1775,7 @@ class TestCephadm(object):
             'service_discovery_key': False,
             'grafana_key': False,
             'alertmanager_key': False,
+            'mgmt_gw_root_key': False,
             'prometheus_key': False,
             'node_exporter_key': False,
             'iscsi_ssl_key': False,
index 77d127f81e5298a5b6775333b34aedef2f4263a7..63b4068f15e92ba78c1e1bdefc6f31b8735f7ef9 100644 (file)
@@ -35,6 +35,7 @@ from ceph.deployment.service_spec import (
     SNMPGatewaySpec,
     ServiceSpec,
     TracingSpec,
+    MgmtGatewaySpec,
 )
 from cephadm.tests.fixtures import with_host, with_service, _run_cephadm, async_side_effect
 
@@ -45,9 +46,9 @@ from orchestrator._interface import DaemonDescription
 
 from typing import Dict, List
 
-grafana_cert = """-----BEGIN CERTIFICATE-----\nMIICxjCCAa4CEQDIZSujNBlKaLJzmvntjukjMA0GCSqGSIb3DQEBDQUAMCExDTAL\nBgNVBAoMBENlcGgxEDAOBgNVBAMMB2NlcGhhZG0wHhcNMjIwNzEzMTE0NzA3WhcN\nMzIwNzEwMTE0NzA3WjAhMQ0wCwYDVQQKDARDZXBoMRAwDgYDVQQDDAdjZXBoYWRt\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAyyMe4DMA+MeYK7BHZMHB\nq7zjliEOcNgxomjU8qbf5USF7Mqrf6+/87XWqj4pCyAW8x0WXEr6A56a+cmBVmt+\nqtWDzl020aoId6lL5EgLLn6/kMDCCJLq++Lg9cEofMSvcZh+lY2f+1p+C+00xent\nrLXvXGOilAZWaQfojT2BpRnNWWIFbpFwlcKrlg2G0cFjV5c1m6a0wpsQ9JHOieq0\nSvwCixajwq3CwAYuuiU1wjI4oJO4Io1+g8yB3nH2Mo/25SApCxMXuXh4kHLQr/T4\n4hqisvG4uJYgKMcSIrWj5o25mclByGi1UI/kZkCUES94i7Z/3ihx4Bad0AMs/9tw\nFwIDAQABMA0GCSqGSIb3DQEBDQUAA4IBAQAf+pwz7Gd7mDwU2LY0TQXsK6/8KGzh\nHuX+ErOb8h5cOAbvCnHjyJFWf6gCITG98k9nxU9NToG0WYuNm/max1y/54f0dtxZ\npUo6KSNl3w6iYCfGOeUIj8isi06xMmeTgMNzv8DYhDt+P2igN6LenqWTVztogkiV\nxQ5ZJFFLEw4sN0CXnrZX3t5ruakxLXLTLKeE0I91YJvjClSBGkVJq26wOKQNHMhx\npWxeydQ5EgPZY+Aviz5Dnxe8aB7oSSovpXByzxURSabOuCK21awW5WJCGNpmqhWK\nZzACBDEstccj57c4OGV0eayHJRsluVr2e9NHRINZA3qdB37e6gsI1xHo\n-----END CERTIFICATE-----\n"""
+ceph_generated_cert = """-----BEGIN CERTIFICATE-----\nMIICxjCCAa4CEQDIZSujNBlKaLJzmvntjukjMA0GCSqGSIb3DQEBDQUAMCExDTAL\nBgNVBAoMBENlcGgxEDAOBgNVBAMMB2NlcGhhZG0wHhcNMjIwNzEzMTE0NzA3WhcN\nMzIwNzEwMTE0NzA3WjAhMQ0wCwYDVQQKDARDZXBoMRAwDgYDVQQDDAdjZXBoYWRt\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAyyMe4DMA+MeYK7BHZMHB\nq7zjliEOcNgxomjU8qbf5USF7Mqrf6+/87XWqj4pCyAW8x0WXEr6A56a+cmBVmt+\nqtWDzl020aoId6lL5EgLLn6/kMDCCJLq++Lg9cEofMSvcZh+lY2f+1p+C+00xent\nrLXvXGOilAZWaQfojT2BpRnNWWIFbpFwlcKrlg2G0cFjV5c1m6a0wpsQ9JHOieq0\nSvwCixajwq3CwAYuuiU1wjI4oJO4Io1+g8yB3nH2Mo/25SApCxMXuXh4kHLQr/T4\n4hqisvG4uJYgKMcSIrWj5o25mclByGi1UI/kZkCUES94i7Z/3ihx4Bad0AMs/9tw\nFwIDAQABMA0GCSqGSIb3DQEBDQUAA4IBAQAf+pwz7Gd7mDwU2LY0TQXsK6/8KGzh\nHuX+ErOb8h5cOAbvCnHjyJFWf6gCITG98k9nxU9NToG0WYuNm/max1y/54f0dtxZ\npUo6KSNl3w6iYCfGOeUIj8isi06xMmeTgMNzv8DYhDt+P2igN6LenqWTVztogkiV\nxQ5ZJFFLEw4sN0CXnrZX3t5ruakxLXLTLKeE0I91YJvjClSBGkVJq26wOKQNHMhx\npWxeydQ5EgPZY+Aviz5Dnxe8aB7oSSovpXByzxURSabOuCK21awW5WJCGNpmqhWK\nZzACBDEstccj57c4OGV0eayHJRsluVr2e9NHRINZA3qdB37e6gsI1xHo\n-----END CERTIFICATE-----\n"""
 
-grafana_key = """-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQDLIx7gMwD4x5gr\nsEdkwcGrvOOWIQ5w2DGiaNTypt/lRIXsyqt/r7/ztdaqPikLIBbzHRZcSvoDnpr5\nyYFWa36q1YPOXTbRqgh3qUvkSAsufr+QwMIIkur74uD1wSh8xK9xmH6VjZ/7Wn4L\n7TTF6e2ste9cY6KUBlZpB+iNPYGlGc1ZYgVukXCVwquWDYbRwWNXlzWbprTCmxD0\nkc6J6rRK/AKLFqPCrcLABi66JTXCMjigk7gijX6DzIHecfYyj/blICkLExe5eHiQ\nctCv9PjiGqKy8bi4liAoxxIitaPmjbmZyUHIaLVQj+RmQJQRL3iLtn/eKHHgFp3Q\nAyz/23AXAgMBAAECggEAVoTB3Mm8azlPlaQB9GcV3tiXslSn+uYJ1duCf0sV52dV\nBzKW8s5fGiTjpiTNhGCJhchowqxoaew+o47wmGc2TvqbpeRLuecKrjScD0GkCYyQ\neM2wlshEbz4FhIZdgS6gbuh9WaM1dW/oaZoBNR5aTYo7xYTmNNeyLA/jO2zr7+4W\n5yES1lMSBXpKk7bDGKYY4bsX2b5RLr2Grh2u2bp7hoLABCEvuu8tSQdWXLEXWpXo\njwmV3hc6tabypIa0mj2Dmn2Dmt1ppSO0AZWG/WAizN3f4Z0r/u9HnbVrVmh0IEDw\n3uf2LP5o3msG9qKCbzv3lMgt9mMr70HOKnJ8ohMSKQKBgQDLkNb+0nr152HU9AeJ\nvdz8BeMxcwxCG77iwZphZ1HprmYKvvXgedqWtS6FRU+nV6UuQoPUbQxJBQzrN1Qv\nwKSlOAPCrTJgNgF/RbfxZTrIgCPuK2KM8I89VZv92TSGi362oQA4MazXC8RAWjoJ\nSu1/PHzK3aXOfVNSLrOWvIYeZQKBgQD/dgT6RUXKg0UhmXj7ExevV+c7oOJTDlMl\nvLngrmbjRgPO9VxLnZQGdyaBJeRngU/UXfNgajT/MU8B5fSKInnTMawv/tW7634B\nw3v6n5kNIMIjJmENRsXBVMllDTkT9S7ApV+VoGnXRccbTiDapBThSGd0wri/CuwK\nNWK1YFOeywKBgEDyI/XG114PBUJ43NLQVWm+wx5qszWAPqV/2S5MVXD1qC6zgCSv\nG9NLWN1CIMimCNg6dm7Wn73IM7fzvhNCJgVkWqbItTLG6DFf3/DPODLx1wTMqLOI\nqFqMLqmNm9l1Nec0dKp5BsjRQzq4zp1aX21hsfrTPmwjxeqJZdioqy2VAoGAXR5X\nCCdSHlSlUW8RE2xNOOQw7KJjfWT+WAYoN0c7R+MQplL31rRU7dpm1bLLRBN11vJ8\nMYvlT5RYuVdqQSP6BkrX+hLJNBvOLbRlL+EXOBrVyVxHCkDe+u7+DnC4epbn+N8P\nLYpwqkDMKB7diPVAizIKTBxinXjMu5fkKDs5n+sCgYBbZheYKk5M0sIxiDfZuXGB\nkf4mJdEkTI1KUGRdCwO/O7hXbroGoUVJTwqBLi1tKqLLarwCITje2T200BYOzj82\nqwRkCXGtXPKnxYEEUOiFx9OeDrzsZV00cxsEnX0Zdj+PucQ/J3Cvd0dWUspJfLHJ\n39gnaegswnz9KMQAvzKFdg==\n-----END PRIVATE KEY-----\n"""
+ceph_generated_key = """-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQDLIx7gMwD4x5gr\nsEdkwcGrvOOWIQ5w2DGiaNTypt/lRIXsyqt/r7/ztdaqPikLIBbzHRZcSvoDnpr5\nyYFWa36q1YPOXTbRqgh3qUvkSAsufr+QwMIIkur74uD1wSh8xK9xmH6VjZ/7Wn4L\n7TTF6e2ste9cY6KUBlZpB+iNPYGlGc1ZYgVukXCVwquWDYbRwWNXlzWbprTCmxD0\nkc6J6rRK/AKLFqPCrcLABi66JTXCMjigk7gijX6DzIHecfYyj/blICkLExe5eHiQ\nctCv9PjiGqKy8bi4liAoxxIitaPmjbmZyUHIaLVQj+RmQJQRL3iLtn/eKHHgFp3Q\nAyz/23AXAgMBAAECggEAVoTB3Mm8azlPlaQB9GcV3tiXslSn+uYJ1duCf0sV52dV\nBzKW8s5fGiTjpiTNhGCJhchowqxoaew+o47wmGc2TvqbpeRLuecKrjScD0GkCYyQ\neM2wlshEbz4FhIZdgS6gbuh9WaM1dW/oaZoBNR5aTYo7xYTmNNeyLA/jO2zr7+4W\n5yES1lMSBXpKk7bDGKYY4bsX2b5RLr2Grh2u2bp7hoLABCEvuu8tSQdWXLEXWpXo\njwmV3hc6tabypIa0mj2Dmn2Dmt1ppSO0AZWG/WAizN3f4Z0r/u9HnbVrVmh0IEDw\n3uf2LP5o3msG9qKCbzv3lMgt9mMr70HOKnJ8ohMSKQKBgQDLkNb+0nr152HU9AeJ\nvdz8BeMxcwxCG77iwZphZ1HprmYKvvXgedqWtS6FRU+nV6UuQoPUbQxJBQzrN1Qv\nwKSlOAPCrTJgNgF/RbfxZTrIgCPuK2KM8I89VZv92TSGi362oQA4MazXC8RAWjoJ\nSu1/PHzK3aXOfVNSLrOWvIYeZQKBgQD/dgT6RUXKg0UhmXj7ExevV+c7oOJTDlMl\nvLngrmbjRgPO9VxLnZQGdyaBJeRngU/UXfNgajT/MU8B5fSKInnTMawv/tW7634B\nw3v6n5kNIMIjJmENRsXBVMllDTkT9S7ApV+VoGnXRccbTiDapBThSGd0wri/CuwK\nNWK1YFOeywKBgEDyI/XG114PBUJ43NLQVWm+wx5qszWAPqV/2S5MVXD1qC6zgCSv\nG9NLWN1CIMimCNg6dm7Wn73IM7fzvhNCJgVkWqbItTLG6DFf3/DPODLx1wTMqLOI\nqFqMLqmNm9l1Nec0dKp5BsjRQzq4zp1aX21hsfrTPmwjxeqJZdioqy2VAoGAXR5X\nCCdSHlSlUW8RE2xNOOQw7KJjfWT+WAYoN0c7R+MQplL31rRU7dpm1bLLRBN11vJ8\nMYvlT5RYuVdqQSP6BkrX+hLJNBvOLbRlL+EXOBrVyVxHCkDe+u7+DnC4epbn+N8P\nLYpwqkDMKB7diPVAizIKTBxinXjMu5fkKDs5n+sCgYBbZheYKk5M0sIxiDfZuXGB\nkf4mJdEkTI1KUGRdCwO/O7hXbroGoUVJTwqBLi1tKqLLarwCITje2T200BYOzj82\nqwRkCXGtXPKnxYEEUOiFx9OeDrzsZV00cxsEnX0Zdj+PucQ/J3Cvd0dWUspJfLHJ\n39gnaegswnz9KMQAvzKFdg==\n-----END PRIVATE KEY-----\n"""
 
 
 class FakeInventory:
@@ -91,17 +92,17 @@ class FakeMgr:
 
 
 class TestCephadmService:
-    def test_set_service_url_on_dashboard(self):
+    def test_set_value_on_dashboard(self):
         # pylint: disable=protected-access
         mgr = FakeMgr()
         service_url = 'http://svc:1000'
         service = GrafanaService(mgr)
-        service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url)
+        service._set_value_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url)
         assert mgr.config == service_url
 
         # set-cmd should not be called if value doesn't change
         mgr.check_mon_command.reset_mock()
-        service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url)
+        service._set_value_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url)
         mgr.check_mon_command.assert_called_once_with({'prefix': 'get-cmd'})
 
     def _get_services(self, mgr):
@@ -591,6 +592,7 @@ class TestMonitoring:
                                 "alertmanager.yml": y,
                             },
                             "peers": [],
+                            "use_url_prefix": False,
                         }
                     }),
                     use_current_daemon_image=False,
@@ -687,6 +689,7 @@ class TestMonitoring:
                             },
                             'peers': [],
                             'web_config': '/etc/alertmanager/web.yml',
+                            "use_url_prefix": False,
                         }
                     }),
                     use_current_daemon_image=False,
@@ -828,6 +831,7 @@ class TestMonitoring:
                             'retention_time': '15d',
                             'retention_size': '0',
                             'ip_to_bind_to': '1.2.3.1',
+                            "use_url_prefix": False
                         },
                     }),
                     use_current_daemon_image=False,
@@ -1011,6 +1015,7 @@ class TestMonitoring:
                             'retention_size': '0',
                             'ip_to_bind_to': '',
                             'web_config': '/etc/prometheus/web.yml',
+                            "use_url_prefix": False
                         },
                     }),
                     use_current_daemon_image=False,
@@ -1158,8 +1163,8 @@ class TestMonitoring:
         _run_cephadm.side_effect = async_side_effect(("{}", "", 0))
 
         with with_host(cephadm_module, "test"):
-            cephadm_module.cert_key_store.save_cert('grafana_cert', grafana_cert, host='test')
-            cephadm_module.cert_key_store.save_key('grafana_key', grafana_key, host='test')
+            cephadm_module.cert_key_store.save_cert('grafana_cert', ceph_generated_cert, host='test')
+            cephadm_module.cert_key_store.save_key('grafana_key', ceph_generated_key, host='test')
             with with_service(
                 cephadm_module, PrometheusSpec("prometheus")
             ) as _, with_service(cephadm_module, ServiceSpec("mgr")) as _, with_service(
@@ -1214,9 +1219,9 @@ class TestMonitoring:
                             isDefault: false
                             editable: false""").lstrip(),
                     'certs/cert_file': dedent(f"""
-                        # generated by cephadm\n{grafana_cert}""").lstrip(),
+                        # generated by cephadm\n{ceph_generated_cert}""").lstrip(),
                     'certs/cert_key': dedent(f"""
-                        # generated by cephadm\n{grafana_key}""").lstrip(),
+                        # generated by cephadm\n{ceph_generated_key}""").lstrip(),
                     'provisioning/dashboards/default.yml': dedent("""
                         # This file is generated by cephadm.
                         apiVersion: 1
@@ -1974,7 +1979,6 @@ class TestIngressService:
     @patch("cephadm.serve.CephadmServe._run_cephadm")
     def test_ingress_config_ssl_rgw(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
         _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
-
         with with_host(cephadm_module, 'test'):
             cephadm_module.cache.update_host_networks('test', {
                 '1.2.3.0/24': {
@@ -2103,7 +2107,6 @@ class TestIngressService:
     @patch("cephadm.serve.CephadmServe._run_cephadm")
     def test_ingress_config_multi_vips(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
         _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
-
         with with_host(cephadm_module, 'test', addr='1.2.3.7'):
             cephadm_module.cache.update_host_networks('test', {
                 '1.2.3.0/24': {
@@ -2231,7 +2234,6 @@ class TestIngressService:
     @patch("cephadm.serve.CephadmServe._run_cephadm")
     def test_keepalive_config_multi_interface_vips(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
         _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
-
         with with_host(cephadm_module, 'test', addr='1.2.3.1'):
             with with_host(cephadm_module, 'test2', addr='1.2.3.2'):
                 cephadm_module.cache.update_host_networks('test', {
@@ -3193,3 +3195,178 @@ class TestSMB:
                     stdin=json.dumps(expected),
                     use_current_daemon_image=False,
                 )
+
+
+class TestMgmtGateway:
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    @patch("cephadm.services.mgmt_gateway.MgmtGatewayService.get_service_endpoints")
+    @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1')
+    @patch('cephadm.ssl_cert_utils.SSLCerts.generate_cert', lambda instance, fqdn, ip: (ceph_generated_cert, ceph_generated_key))
+    @patch("cephadm.services.mgmt_gateway.get_dashboard_endpoints", lambda _: (["ceph-node-2:8443", "ceph-node-2:8443"], "https"))
+    def test_mgmt_gateway_config(self, get_service_endpoints_mock: List[str], _run_cephadm, cephadm_module: CephadmOrchestrator):
+
+        def get_services_endpoints(name):
+            if name == 'prometheus':
+                return ["192.168.100.100:9095", "192.168.100.101:9095"]
+            elif name == 'grafana':
+                return ["ceph-node-2:3000", "ceph-node-2:3000"]
+            elif name == 'alertmanager':
+                return ["192.168.100.100:9093", "192.168.100.102:9093"]
+            return []
+
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        get_service_endpoints_mock.side_effect = get_services_endpoints
+
+        server_port = 5555
+        spec = MgmtGatewaySpec(port=server_port,
+                               ssl_certificate=ceph_generated_cert,
+                               ssl_certificate_key=ceph_generated_key)
+
+        expected = {
+            "fsid": "fsid",
+            "name": "mgmt-gateway.ceph-node",
+            "image": "",
+            "deploy_arguments": [],
+            "params": {"tcp_ports": [server_port]},
+            "meta": {
+                "service_name": "mgmt-gateway",
+                "ports": [server_port],
+                "ip": None,
+                "deployed_by": [],
+                "rank": None,
+                "rank_generation": None,
+                "extra_container_args": None,
+                "extra_entrypoint_args": None
+            },
+            "config_blobs": {
+                "files": {
+                    "nginx.conf": dedent("""
+                                         # This file is generated by cephadm.
+                                         worker_rlimit_nofile 8192;
+
+                                         events {
+                                             worker_connections 4096;
+                                         }
+
+                                         http {
+                                             upstream dashboard_servers {
+                                              server ceph-node-2:8443;
+                                              server ceph-node-2:8443;
+                                             }
+
+                                             upstream grafana_servers {
+                                              server ceph-node-2:3000;
+                                              server ceph-node-2:3000;
+                                             }
+
+                                             upstream prometheus_servers {
+                                              server 192.168.100.100:9095;
+                                              server 192.168.100.101:9095;
+                                             }
+
+                                             upstream alertmanager_servers {
+                                              server 192.168.100.100:9093;
+                                              server 192.168.100.102:9093;
+                                             }
+
+                                             include /etc/nginx_external_server.conf;
+                                             include /etc/nginx_internal_server.conf;
+                                         }"""),
+                    "nginx_external_server.conf": dedent("""
+                                             server {
+                                                 listen                    5555 ssl;
+                                                 listen                    [::]:5555 ssl;
+                                                 ssl_certificate            /etc/nginx/ssl/nginx.crt;
+                                                 ssl_certificate_key /etc/nginx/ssl/nginx.key;
+                                                 ssl_protocols            TLSv1.3;
+                                                 # from:  https://ssl-config.mozilla.org/#server=nginx
+                                                 ssl_ciphers              ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:DHE-RSA-CHACHA20-POLY1305;
+
+                                                 # Only return Nginx in server header, no extra info will be provided
+                                                 server_tokens             off;
+
+                                                 # Perfect Forward Secrecy(PFS) is frequently compromised without this
+                                                 ssl_prefer_server_ciphers on;
+
+                                                 # Enable SSL session caching for improved performance
+                                                 ssl_session_tickets       off;
+                                                 ssl_session_timeout       1d;
+                                                 ssl_session_cache         shared:SSL:10m;
+
+                                                 # OCSP stapling
+                                                 ssl_stapling              on;
+                                                 ssl_stapling_verify       on;
+                                                 resolver_timeout 5s;
+
+                                                 # Security headers
+                                                 ## X-Content-Type-Options: avoid MIME type sniffing
+                                                 add_header X-Content-Type-Options nosniff;
+                                                 ## Strict Transport Security (HSTS): Yes
+                                                 add_header Strict-Transport-Security "max-age=31536000; includeSubdomains; preload";
+                                                 ## Enables the Cross-site scripting (XSS) filter in browsers.
+                                                 add_header X-XSS-Protection "1; mode=block";
+                                                 ## Content-Security-Policy (CSP): FIXME
+                                                 # add_header Content-Security-Policy "default-src 'self'; script-src 'self'; object-src 'none'; base-uri 'none'; require-trusted-types-for 'script'; frame-ancestors 'self';";
+
+
+                                                 location / {
+                                                     proxy_pass https://dashboard_servers;
+                                                     proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
+                                                 }
+
+                                                 location /grafana {
+                                                     rewrite ^/grafana/(.*) /$1 break;
+                                                     proxy_pass https://grafana_servers;
+                                                 }
+
+                                                 location /prometheus {
+                                                     proxy_pass http://prometheus_servers;
+                                                 }
+
+                                                 location /alertmanager {
+                                                     proxy_pass http://alertmanager_servers;
+                                                 }
+                                             }"""),
+                    "nginx_internal_server.conf": dedent("""
+                                             server {
+                                                 listen              29443 ssl;
+                                                 listen              [::]:29443 ssl;
+                                                 ssl_certificate     /etc/nginx/ssl/nginx_internal.crt;
+                                                 ssl_certificate_key /etc/nginx/ssl/nginx_internal.key;
+                                                 ssl_protocols       TLSv1.2 TLSv1.3;
+                                                 ssl_ciphers         AES128-SHA:AES256-SHA:RC4-SHA:DES-CBC3-SHA:RC4-MD5;
+                                                 ssl_prefer_server_ciphers on;
+
+                                                 location /internal/grafana {
+                                                     rewrite ^/internal/grafana/(.*) /$1 break;
+                                                     proxy_pass https://grafana_servers;
+                                                 }
+
+                                                 location /internal/prometheus {
+                                                     rewrite ^/internal/prometheus/(.*) /prometheus/$1 break;
+                                                     proxy_pass http://prometheus_servers;
+                                                 }
+
+                                                 location /internal/alertmanager {
+                                                     rewrite ^/internal/alertmanager/(.*) /alertmanager/$1 break;
+                                                     proxy_pass http://alertmanager_servers;
+                                                 }
+                                             }"""),
+                    "nginx_internal.crt": f"{ceph_generated_cert}",
+                    "nginx_internal.key": f"{ceph_generated_key}",
+                    "nginx.crt": f"{ceph_generated_cert}",
+                    "nginx.key": f"{ceph_generated_key}",
+                }
+            }
+        }
+
+        with with_host(cephadm_module, 'ceph-node'):
+            with with_service(cephadm_module, spec):
+                _run_cephadm.assert_called_with(
+                    'ceph-node',
+                    'mgmt-gateway.ceph-node',
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps(expected),
+                    use_current_daemon_image=False,
+                )
index 8a04e31170aeb7888002e7a8319f4c97c06317db..0a320e3455683a63f549d470495a5658bd0497f5 100644 (file)
@@ -42,6 +42,7 @@ from ceph.deployment.service_spec import (
     RGWSpec,
     SMBSpec,
     SNMPGatewaySpec,
+    MgmtGatewaySpec,
     ServiceSpec,
     TunedProfileSpec,
 )
@@ -590,6 +591,7 @@ class Orchestrator(object):
             'snmp-gateway': self.apply_snmp_gateway,
             'host': self.add_host,
             'smb': self.apply_smb,
+            'mgmt-gateway': self.apply_mgmt_gateway,
         }
 
         def merge(l: OrchResult[List[str]], r: OrchResult[str]) -> OrchResult[List[str]]:  # noqa: E741
@@ -831,6 +833,10 @@ class Orchestrator(object):
         """Update an existing snmp gateway service"""
         raise NotImplementedError()
 
+    def apply_mgmt_gateway(self, spec: MgmtGatewaySpec) -> OrchResult[str]:
+        """Update an existing cluster gateway service"""
+        raise NotImplementedError()
+
     def apply_smb(self, spec: SMBSpec) -> OrchResult[str]:
         """Update a smb gateway service"""
         raise NotImplementedError()
@@ -914,6 +920,7 @@ def daemon_type_to_service(dtype: str) -> str:
         'keepalived': 'ingress',
         'iscsi': 'iscsi',
         'nvmeof': 'nvmeof',
+        'mgmt-gateway': 'mgmt-gateway',
         'rbd-mirror': 'rbd-mirror',
         'cephfs-mirror': 'cephfs-mirror',
         'nfs': 'nfs',
@@ -949,6 +956,7 @@ def service_to_daemon_types(stype: str) -> List[str]:
         'ingress': ['haproxy', 'keepalived'],
         'iscsi': ['iscsi'],
         'nvmeof': ['nvmeof'],
+        'mgmt-gateway': ['mgmt-gateway'],
         'rbd-mirror': ['rbd-mirror'],
         'cephfs-mirror': ['cephfs-mirror'],
         'nfs': ['nfs'],
index 5b5d7154da1de68d161322bbab40b01206a4ec67..3b943d41ca9efbf12270a65bceb79f777bfdd15a 100644 (file)
@@ -46,6 +46,7 @@ from ._interface import (
     RGWSpec,
     SMBSpec,
     SNMPGatewaySpec,
+    MgmtGatewaySpec,
     ServiceDescription,
     TunedProfileSpec,
     _cli_read_command,
@@ -1777,6 +1778,32 @@ Usage:
 
         return self._apply_misc([spec], dry_run, format, no_overwrite)
 
+    @_cli_write_command('orch apply mgmt-gateway')
+    def _apply_mgmt_gateway(self,
+                            port: Optional[int] = None,
+                            disable_https: Optional[bool] = False,
+                            placement: Optional[str] = None,
+                            unmanaged: bool = False,
+                            dry_run: bool = False,
+                            format: Format = Format.plain,
+                            no_overwrite: bool = False,
+                            inbuf: Optional[str] = None) -> HandleCommandResult:
+        """Add a cluster gateway service (cephadm only)"""
+        if inbuf:
+            raise OrchestratorValidationError('unrecognized command -i; -h or --help for usage')
+
+        spec = MgmtGatewaySpec(
+            placement=PlacementSpec.from_string(placement),
+            unmanaged=unmanaged,
+            port=port,
+            disable_https=disable_https,
+            preview_only=dry_run
+        )
+
+        spec.validate()  # force any validation exceptions to be caught correctly
+
+        return self._apply_misc([spec], dry_run, format, no_overwrite)
+
     @_cli_write_command('orch apply nvmeof')
     def _apply_nvmeof(self,
                       pool: str,
index b91b62b02ace6867d8459bb2c681aa93e916074e..5a294898fc51987a5ebbe4bc7db1827eb6f6210e 100644 (file)
@@ -765,6 +765,7 @@ class ServiceSpec(object):
         'elasticsearch',
         'grafana',
         'ingress',
+        'mgmt-gateway',
         'iscsi',
         'jaeger-agent',
         'jaeger-collector',
@@ -819,6 +820,7 @@ class ServiceSpec(object):
             'nvmeof': NvmeofServiceSpec,
             'alertmanager': AlertManagerSpec,
             'ingress': IngressSpec,
+            'mgmt-gateway': MgmtGatewaySpec,
             'container': CustomContainerSpec,
             'grafana': GrafanaSpec,
             'node-exporter': MonitoringSpec,
@@ -1755,6 +1757,135 @@ class IngressSpec(ServiceSpec):
 yaml.add_representer(IngressSpec, ServiceSpec.yaml_representer)
 
 
+class MgmtGatewaySpec(ServiceSpec):
+    def __init__(self,
+                 service_type: str = 'mgmt-gateway',
+                 service_id: Optional[str] = None,
+                 config: Optional[Dict[str, str]] = None,
+                 networks: Optional[List[str]] = None,
+                 placement: Optional[PlacementSpec] = None,
+                 disable_https: Optional[bool] = False,
+                 port: Optional[int] = None,
+                 ssl_certificate: Optional[str] = None,
+                 ssl_certificate_key: Optional[str] = None,
+                 ssl_prefer_server_ciphers: Optional[str] = None,
+                 ssl_session_tickets: Optional[str] = None,
+                 ssl_session_timeout: Optional[str] = None,
+                 ssl_session_cache: Optional[str] = None,
+                 server_tokens: Optional[str] = None,
+                 ssl_stapling: Optional[str] = None,
+                 ssl_stapling_verify: Optional[str] = None,
+                 ssl_protocols: Optional[List[str]] = None,
+                 ssl_ciphers: Optional[List[str]] = None,
+                 preview_only: bool = False,
+                 unmanaged: bool = False,
+                 extra_container_args: Optional[GeneralArgList] = None,
+                 extra_entrypoint_args: Optional[GeneralArgList] = None,
+                 custom_configs: Optional[List[CustomConfig]] = None,
+                 ):
+        assert service_type == 'mgmt-gateway'
+
+        super(MgmtGatewaySpec, self).__init__(
+            'mgmt-gateway', service_id=service_id,
+            placement=placement, config=config,
+            networks=networks,
+            preview_only=preview_only,
+            extra_container_args=extra_container_args,
+            extra_entrypoint_args=extra_entrypoint_args,
+            custom_configs=custom_configs
+        )
+        #: Is a flag to disable HTTPS. If True, the server will use unsecure HTTP
+        self.disable_https = disable_https
+        #: The port number on which the server will listen
+        self.port = port
+        #: A multi-line string that contains the SSL certificate
+        self.ssl_certificate = ssl_certificate
+        #: A multi-line string that contains the SSL key
+        self.ssl_certificate_key = ssl_certificate_key
+        #: Prefer server ciphers over client ciphers: on | off
+        self.ssl_prefer_server_ciphers = ssl_prefer_server_ciphers
+        #: A multioption flag to control session tickets: on | off
+        self.ssl_session_tickets = ssl_session_tickets
+        #: The duration for SSL session timeout. Syntax: time (i.e: 5m)
+        self.ssl_session_timeout = ssl_session_timeout
+        #: Duration an SSL/TLS session is cached: off | none | [builtin[:size]] [shared:name:size]
+        self.ssl_session_cache = ssl_session_cache
+        #: Flag control server tokens in responses:  on | off | build | string
+        self.server_tokens = server_tokens
+        #: Flag to enable or disable SSL stapling: on | off
+        self.ssl_stapling = ssl_stapling
+        #: Flag to control verification of SSL stapling: on | off
+        self.ssl_stapling_verify = ssl_stapling_verify
+        #: A list of supported SSL protocols (as supported by nginx)
+        self.ssl_protocols = ssl_protocols
+        #: List of supported secure SSL ciphers. Changing this list may reduce system security.
+        self.ssl_ciphers = ssl_ciphers
+
+    def get_port_start(self) -> List[int]:
+        ports = []
+        if self.port is not None:
+            ports.append(cast(int, self.port))
+        return ports
+
+    def validate(self) -> None:
+        super(MgmtGatewaySpec, self).validate()
+        self._validate_port(self.port)
+        self._validate_certificate(self.ssl_certificate, "ssl_certificate")
+        self._validate_private_key(self.ssl_certificate_key, "ssl_certificate_key")
+        self._validate_boolean_switch(self.ssl_prefer_server_ciphers, "ssl_prefer_server_ciphers")
+        self._validate_boolean_switch(self.ssl_session_tickets, "ssl_session_tickets")
+        self._validate_session_timeout(self.ssl_session_timeout)
+        self._validate_session_cache(self.ssl_session_cache)
+        self._validate_server_tokens(self.server_tokens)
+        self._validate_boolean_switch(self.ssl_stapling, "ssl_stapling")
+        self._validate_boolean_switch(self.ssl_stapling_verify, "ssl_stapling_verify")
+        self._validate_ssl_protocols(self.ssl_protocols)
+
+    def _validate_port(self, port: Optional[int]) -> None:
+        if port is not None and not (1 <= port <= 65535):
+            raise SpecValidationError(f"Invalid port: {port}. Must be between 1 and 65535.")
+
+    def _validate_certificate(self, cert: Optional[str], name: str) -> None:
+        if cert is not None and not isinstance(cert, str):
+            raise SpecValidationError(f"Invalid {name}. Must be a string.")
+
+    def _validate_private_key(self, key: Optional[str], name: str) -> None:
+        if key is not None and not isinstance(key, str):
+            raise SpecValidationError(f"Invalid {name}. Must be a string.")
+
+    def _validate_boolean_switch(self, value: Optional[str], name: str) -> None:
+        if value is not None and value not in ['on', 'off']:
+            raise SpecValidationError(f"Invalid {name}: {value}. Supported values: on | off.")
+
+    def _validate_session_timeout(self, timeout: Optional[str]) -> None:
+        if timeout is not None and not re.match(r'^\d+[smhd]$', timeout):
+            raise SpecValidationError(f"Invalid SSL Session Timeout: {timeout}. \
+            Value must be a number followed by 's', 'm', 'h', or 'd'.")
+
+    def _validate_session_cache(self, cache: Optional[str]) -> None:
+        valid_caches = ['none', 'off', 'builtin', 'shared']
+        if cache is not None and not any(cache.startswith(vc) for vc in valid_caches):
+            raise SpecValidationError(f"Invalid SSL Session Cache: {cache}. Supported values are: \
+            off | none | [builtin[:size]] [shared:name:size]")
+
+    def _validate_server_tokens(self, tokens: Optional[str]) -> None:
+        if tokens is not None and tokens not in ['on', 'off', 'build', 'string']:
+            raise SpecValidationError(f"Invalid Server Tokens: {tokens}. Must be one of \
+            ['on', 'off', 'build', 'version'].")
+
+    def _validate_ssl_protocols(self, protocols: Optional[List[str]]) -> None:
+        if protocols is None:
+            return
+        valid_protocols = ['TLSv1.2', 'TLSv1.3']
+        for protocol in protocols:
+            if protocol not in valid_protocols:
+                raise SpecValidationError(f"Invalid SSL Protocol: {protocol}. \
+                Must be one of {valid_protocols}.")
+
+
+yaml.add_representer(MgmtGatewaySpec, ServiceSpec.yaml_representer)
+
+
 class InitContainerSpec(object):
     """An init container is not a service that lives on its own, but rather
     is used to run and exit prior to a service container starting in order