The ``reconfig`` command also takes care of setting the right URL for Ceph
Dashboard.
+Networks and Ports
+~~~~~~~~~~~~~~~~~~
+
+All monitoring services can have the network and port they bind to configured with a yaml service specification
+
+example spec file:
+
+.. code-block:: yaml
+
+ service_type: grafana
+ service_name: grafana
+ placement:
+ count: 1
+ networks:
+ - 192.169.142.0/24
+ spec:
+ port: 4200
+
Using custom images
~~~~~~~~~~~~~~~~~~~
'args': [
'--config.file=/etc/prometheus/prometheus.yml',
'--storage.tsdb.path=/prometheus',
- '--web.listen-address=:{}'.format(port_map['prometheus'][0]),
],
'config-json-files': [
'prometheus.yml',
'cpus': '2',
'memory': '2GB',
'args': [
- '--web.listen-address=:{}'.format(port_map['alertmanager'][0]),
'--cluster.listen-address=:{}'.format(port_map['alertmanager'][1]),
],
'config-json-files': [
elif daemon_type in Monitoring.components:
metadata = Monitoring.components[daemon_type]
r += metadata.get('args', list())
+ # set ip and port to bind to for nodeexporter,alertmanager,prometheus
+ if daemon_type != 'grafana':
+ ip = ''
+ port = Monitoring.port_map[daemon_type][0]
+ if 'meta_json' in ctx and ctx.meta_json:
+ meta = json.loads(ctx.meta_json) or {}
+ if 'ip' in meta and meta['ip']:
+ ip = meta['ip']
+ if 'ports' in meta and meta['ports']:
+ port = meta['ports'][0]
+ r += [f'--web.listen-address={ip}:{port}']
if daemon_type == 'alertmanager':
config = get_parm(ctx.config_json)
peers = config.get('peers', list()) # type: ignore
def update_firewalld(ctx, daemon_type):
# type: (CephadmContext, str) -> None
firewall = Firewalld(ctx)
-
firewall.enable_service_for(daemon_type)
-
- fw_ports = []
-
- if daemon_type in Monitoring.port_map.keys():
- fw_ports.extend(Monitoring.port_map[daemon_type]) # prometheus etc
-
- firewall.open_ports(fw_ports)
firewall.apply_rules()
elif daemon_type in Monitoring.components:
# monitoring daemon - prometheus, grafana, alertmanager, node-exporter
# Default Checks
- if not ctx.reconfig and not redeploy:
- daemon_ports.extend(Monitoring.port_map[daemon_type])
-
# make sure provided config-json is sufficient
config = get_parm(ctx.config_json) # type: ignore
required_files = Monitoring.components[daemon_type].get('config-json-files', list())
prom_services = [] # type: List[str]
for dd in self.mgr.cache.get_daemons_by_service('prometheus'):
assert dd.hostname is not None
- prom_services.append(dd.hostname)
+ addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname)
+ port = dd.ports[0] if dd.ports else 9095
+ prom_services.append(addr + ':' + str(port))
deps.append(dd.name())
grafana_data_sources = self.mgr.template.render(
'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services})
})
grafana_ini = self.mgr.template.render(
- 'services/grafana/grafana.ini.j2', {'http_port': self.DEFAULT_SERVICE_PORT})
+ 'services/grafana/grafana.ini.j2', {
+ 'http_port': daemon_spec.ports[0] if daemon_spec.ports else self.DEFAULT_SERVICE_PORT,
+ 'http_addr': daemon_spec.ip if daemon_spec.ip else ''
+ })
config_file = {
'files': {
# TODO: signed cert
dd = self.get_active_daemon(daemon_descrs)
assert dd.hostname is not None
- service_url = 'https://{}:{}'.format(
- self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT)
+ addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname)
+ port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
+ service_url = 'https://{}:{}'.format(addr, port)
self._set_service_url_on_dashboard(
'Grafana',
'dashboard get-grafana-api-url',
def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
dd = self.get_active_daemon(daemon_descrs)
assert dd.hostname is not None
- service_url = 'http://{}:{}'.format(self._inventory_get_addr(dd.hostname),
- self.DEFAULT_SERVICE_PORT)
+ addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname)
+ port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
+ service_url = 'http://{}:{}'.format(addr, port)
self._set_service_url_on_dashboard(
'AlertManager',
'dashboard get-alertmanager-api-host',
for dd in self.mgr.cache.get_daemons_by_service('node-exporter'):
assert dd.hostname is not None
deps.append(dd.name())
- addr = self.mgr.inventory.get_addr(dd.hostname)
+ addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
+ port = str(dd.ports[0]) if dd.ports else '9100'
nodes.append({
'hostname': dd.hostname,
- 'url': addr.split(':')[0] + ':9100'
+ 'url': addr.split(':')[0] + ':' + port
})
# scrape alert managers
for dd in self.mgr.cache.get_daemons_by_service('alertmanager'):
assert dd.hostname is not None
deps.append(dd.name())
- addr = self.mgr.inventory.get_addr(dd.hostname)
- alertmgr_targets.append("'{}:9093'".format(addr.split(':')[0]))
+ addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
+ port = str(dd.ports[0]) if dd.ports else '9093'
+ alertmgr_targets.append("'{}:{}'".format(addr.split(':')[0], port))
# scrape haproxies
haproxy_targets = []
def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
dd = self.get_active_daemon(daemon_descrs)
assert dd.hostname is not None
- service_url = 'http://{}:{}'.format(
- self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT)
+ addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname)
+ port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
+ service_url = 'http://{}:{}'.format(addr, port)
self._set_service_url_on_dashboard(
'Prometheus',
'dashboard get-prometheus-api-host',
type: 'prometheus'
access: 'proxy'
orgId: 1
- url: 'http://{{ host }}:9095'
+ url: 'http://{{ host }}'
basicAuth: false
isDefault: {{ 'true' if loop.first else 'false' }}
editable: false
cert_file = /etc/grafana/certs/cert_file
cert_key = /etc/grafana/certs/cert_key
http_port = {{ http_port }}
+ http_addr = {{ http_addr }}
[security]
admin_user = admin
admin_password = admin
import pytest
+import yaml
+
from ceph.deployment.drive_group import DriveGroupSpec, DeviceSelection
from cephadm.serve import CephadmServe
from cephadm.services.osd import OSD, OSDRemovalQueue, OsdIdClaims
+ '"keyring": "", "files": {"config": "[mon.test]\\npublic network = 127.0.0.0/8\\n"}}',
image='')
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_monitoring_ports(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+
+ with with_host(cephadm_module, 'test'):
+
+ yaml_str = """service_type: alertmanager
+service_name: alertmanager
+placement:
+ count: 1
+spec:
+ port: 4200
+"""
+ yaml_file = yaml.safe_load(yaml_str)
+ spec = ServiceSpec.from_json(yaml_file)
+
+ with mock.patch("cephadm.services.monitoring.AlertmanagerService.generate_config", return_value=({}, [])):
+ with with_service(cephadm_module, spec):
+
+ CephadmServe(cephadm_module)._check_daemons()
+
+ _run_cephadm.assert_called_with(
+ 'test', 'alertmanager.test', 'deploy', [
+ '--name', 'alertmanager.test',
+ '--meta-json', '{"service_name": "alertmanager", "ports": [4200, 9094], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null}',
+ '--config-json', '-',
+ '--tcp-ports', '4200 9094',
+ '--reconfig'
+ ],
+ stdin='{}',
+ image='')
+
@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
def test_daemon_check_post(self, cephadm_module: CephadmOrchestrator):
with with_host(cephadm_module, 'test'):
'alertmanager': AlertManagerSpec,
'ingress': IngressSpec,
'container': CustomContainerSpec,
+ 'grafana': MonitoringSpec,
+ 'node-exporter': MonitoringSpec,
+ 'prometheus': MonitoringSpec,
}.get(service_type, cls)
if ret == ServiceSpec and not service_type:
raise SpecValidationError('Spec needs a "service_type" key.')
user_data: Optional[Dict[str, Any]] = None,
config: Optional[Dict[str, str]] = None,
networks: Optional[List[str]] = None,
+ port: Optional[int] = None,
):
assert service_type == 'alertmanager'
super(AlertManagerSpec, self).__init__(
# added to the default receivers'
# <webhook_configs> configuration.
self.user_data = user_data or {}
+ self.port = port
+
+ def get_port_start(self) -> List[int]:
+ return [self.get_port(), 9094]
+
+ def get_port(self) -> int:
+ if self.port:
+ return self.port
+ else:
+ return 9093
+
+ def validate(self) -> None:
+ super(AlertManagerSpec, self).validate()
+
+ if self.port == 9094:
+ raise SpecValidationError(
+ 'Port 9094 is reserved for AlertManager cluster listen address')
yaml.add_representer(AlertManagerSpec, ServiceSpec.yaml_representer)
yaml.add_representer(CustomContainerSpec, ServiceSpec.yaml_representer)
+
+
+class MonitoringSpec(ServiceSpec):
+ def __init__(self,
+ service_type: str,
+ service_id: Optional[str] = None,
+ config: Optional[Dict[str, str]] = None,
+ networks: Optional[List[str]] = None,
+ placement: Optional[PlacementSpec] = None,
+ unmanaged: bool = False,
+ preview_only: bool = False,
+ port: Optional[int] = None,
+ ):
+ assert service_type in ['grafana', 'node-exporter', 'prometheus']
+
+ super(MonitoringSpec, self).__init__(
+ service_type, service_id,
+ placement=placement, unmanaged=unmanaged,
+ preview_only=preview_only, config=config,
+ networks=networks)
+
+ self.service_type = service_type
+ self.port = port
+
+ def get_port_start(self) -> List[int]:
+ return [self.get_port()]
+
+ def get_port(self) -> int:
+ if self.port:
+ return self.port
+ else:
+ return {'prometheus': 9095,
+ 'node-exporter': 9100,
+ 'grafana': 3000}[self.service_type]