From: Adam King Date: Mon, 22 Aug 2022 15:14:12 +0000 (-0400) Subject: mgr/cephadm: allow setting prometheus retention time X-Git-Tag: v17.2.6~495^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=30eb87b7e4e632bc84f49549b34d18c6bb19b8f7;p=ceph.git mgr/cephadm: allow setting prometheus retention time When we deploy Prometheus server, we don't provide any ability to define the tsdb retention time - so it defaults to 15d. This change adds a field that can be passed in a prometheus service spec that will be passed as an arg to the --storage.tsdb.retention.time parameter for the prometheus daemon. Fixes: https://tracker.ceph.com/issues/54308 Signed-off-by: Adam King (cherry picked from commit 91dd03fd648d25773a83fdad311b62b781619fc4) Conflicts: src/pybind/mgr/cephadm/services/monitoring.py src/pybind/mgr/cephadm/tests/test_services.py --- diff --git a/src/cephadm/cephadm b/src/cephadm/cephadm index 010370d2eff8..6bd8deabca4f 100755 --- a/src/cephadm/cephadm +++ b/src/cephadm/cephadm @@ -2594,6 +2594,9 @@ def get_daemon_args(ctx, fsid, daemon_type, daemon_id): port = meta['ports'][0] r += [f'--web.listen-address={ip}:{port}'] if daemon_type == 'prometheus': + config = get_parm(ctx.config_json) + retention_time = config.get('retention_time', '15d') + r += [f'--storage.tsdb.retention.time={retention_time}'] scheme = 'http' host = get_fqdn() r += [f'--web.external-url={scheme}://{host}:{port}'] diff --git a/src/cephadm/tests/test_cephadm.py b/src/cephadm/tests/test_cephadm.py index 7975e1f58ae8..b5c6008e88d7 100644 --- a/src/cephadm/tests/test_cephadm.py +++ b/src/cephadm/tests/test_cephadm.py @@ -1160,6 +1160,7 @@ class TestMonitoring(object): def test_prometheus_external_url(self): ctx = cd.CephadmContext() + ctx.config_json = json.dumps({'files': {}, 'retention_time': '15d'}) daemon_type = 'prometheus' daemon_id = 'home' fsid = 'aaf5a720-13fe-4a3b-82b9-2d99b7fd9704' diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index f99c79e79557..58b0fca2d78f 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -9,7 +9,8 @@ from urllib.parse import urlparse from mgr_module import HandleCommandResult from orchestrator import DaemonDescription -from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, SNMPGatewaySpec +from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, \ + SNMPGatewaySpec, PrometheusSpec from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec from cephadm.services.ingress import IngressSpec from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert, build_url @@ -289,6 +290,14 @@ class PrometheusService(CephadmService): daemon_spec: CephadmDaemonDeploySpec, ) -> Tuple[Dict[str, Any], List[str]]: assert self.TYPE == daemon_spec.daemon_type + + prom_spec = cast(PrometheusSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + + try: + retention_time = prom_spec.retention_time if prom_spec.retention_time else '15d' + except AttributeError: + retention_time = '15d' + deps = [] # type: List[str] # scrape mgrs @@ -363,12 +372,13 @@ class PrometheusService(CephadmService): 'haproxy_targets': haproxy_targets, 'nodes': nodes, } - r = { + r: Dict[str, Any] = { 'files': { 'prometheus.yml': self.mgr.template.render( 'services/prometheus/prometheus.yml.j2', context) - } + }, + 'retention_time': retention_time } # include alerts, if present in the container diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index 98dcc850f205..175cb6b4df66 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -427,7 +427,7 @@ class TestMonitoring: '--tcp-ports', '9095' ], stdin=json.dumps({"files": {"prometheus.yml": y, - "/etc/prometheus/alerting/custom_alerts.yml": ""}}), + "/etc/prometheus/alerting/custom_alerts.yml": ""}, 'retention_time': '15d'}), image='') @patch("cephadm.serve.CephadmServe._run_cephadm") diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py index 72f3efa2e913..3f8bd315a394 100644 --- a/src/python-common/ceph/deployment/service_spec.py +++ b/src/python-common/ceph/deployment/service_spec.py @@ -519,7 +519,7 @@ class ServiceSpec(object): 'container': CustomContainerSpec, 'grafana': GrafanaSpec, 'node-exporter': MonitoringSpec, - 'prometheus': MonitoringSpec, + 'prometheus': PrometheusSpec, 'loki': MonitoringSpec, 'promtail': MonitoringSpec, 'snmp-gateway': SNMPGatewaySpec, @@ -1255,6 +1255,33 @@ class GrafanaSpec(MonitoringSpec): yaml.add_representer(GrafanaSpec, ServiceSpec.yaml_representer) +class PrometheusSpec(MonitoringSpec): + def __init__(self, + service_type: str = 'prometheus', + service_id: Optional[str] = None, + placement: Optional[PlacementSpec] = None, + unmanaged: bool = False, + preview_only: bool = False, + config: Optional[Dict[str, str]] = None, + networks: Optional[List[str]] = None, + port: Optional[int] = None, + retention_time: Optional[str] = None, + extra_container_args: Optional[List[str]] = None, + custom_configs: Optional[List[CustomConfig]] = None, + ): + assert service_type == 'prometheus' + super(PrometheusSpec, self).__init__( + 'prometheus', service_id=service_id, + placement=placement, unmanaged=unmanaged, + preview_only=preview_only, config=config, networks=networks, port=port, + extra_container_args=extra_container_args, custom_configs=custom_configs) + + self.retention_time = retention_time + + +yaml.add_representer(PrometheusSpec, ServiceSpec.yaml_representer) + + class SNMPGatewaySpec(ServiceSpec): class SNMPVersion(str, enum.Enum): V2c = 'V2c'