From: Adam King Date: Mon, 22 Aug 2022 15:14:12 +0000 (-0400) Subject: mgr/cephadm: allow setting prometheus retention time X-Git-Tag: v16.2.11~312^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=494ee54d8691b0f80120afefcdf6751e907d13c2;p=ceph.git mgr/cephadm: allow setting prometheus retention time When we deploy Prometheus server, we don't provide any ability to define the tsdb retention time - so it defaults to 15d. This change adds a field that can be passed in a prometheus service spec that will be passed as an arg to the --storage.tsdb.retention.time parameter for the prometheus daemon. Fixes: https://tracker.ceph.com/issues/54308 Signed-off-by: Adam King (cherry picked from commit 91dd03fd648d25773a83fdad311b62b781619fc4) Conflicts: src/pybind/mgr/cephadm/services/monitoring.py src/pybind/mgr/cephadm/tests/test_services.py src/python-common/ceph/deployment/service_spec.py --- diff --git a/src/cephadm/cephadm b/src/cephadm/cephadm index 46c3d7fce5e..c91f1dc736b 100755 --- a/src/cephadm/cephadm +++ b/src/cephadm/cephadm @@ -2376,6 +2376,9 @@ def get_daemon_args(ctx, fsid, daemon_type, daemon_id): port = meta['ports'][0] r += [f'--web.listen-address={ip}:{port}'] if daemon_type == 'prometheus': + config = get_parm(ctx.config_json) + retention_time = config.get('retention_time', '15d') + r += [f'--storage.tsdb.retention.time={retention_time}'] scheme = 'http' host = get_fqdn() r += [f'--web.external-url={scheme}://{host}:{port}'] diff --git a/src/cephadm/tests/test_cephadm.py b/src/cephadm/tests/test_cephadm.py index d513dcb8982..c57963d4dc9 100644 --- a/src/cephadm/tests/test_cephadm.py +++ b/src/cephadm/tests/test_cephadm.py @@ -1095,6 +1095,7 @@ class TestMonitoring(object): def test_prometheus_external_url(self): ctx = cd.CephadmContext() + ctx.config_json = json.dumps({'files': {}, 'retention_time': '15d'}) daemon_type = 'prometheus' daemon_id = 'home' fsid = 'aaf5a720-13fe-4a3b-82b9-2d99b7fd9704' diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index 9c2a08b6d70..ef47d2cf932 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -9,7 +9,8 @@ from urllib.parse import urlparse from mgr_module import HandleCommandResult from orchestrator import DaemonDescription -from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, SNMPGatewaySpec +from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, \ + SNMPGatewaySpec, PrometheusSpec from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec from cephadm.services.ingress import IngressSpec from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert, build_url @@ -267,6 +268,13 @@ class PrometheusService(CephadmService): assert self.TYPE == daemon_spec.daemon_type deps = [] # type: List[str] + prom_spec = cast(PrometheusSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + + try: + retention_time = prom_spec.retention_time if prom_spec.retention_time else '15d' + except AttributeError: + retention_time = '15d' + # scrape mgrs mgr_scrape_list = [] mgr_map = self.mgr.get('mgr_map') @@ -339,12 +347,13 @@ class PrometheusService(CephadmService): 'haproxy_targets': haproxy_targets, 'nodes': nodes, } - r = { + r: Dict[str, Any] = { 'files': { 'prometheus.yml': self.mgr.template.render( 'services/prometheus/prometheus.yml.j2', context) - } + }, + 'retention_time': retention_time } # include alerts, if present in the container diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index d1945d37227..5eac052d08a 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -425,7 +425,7 @@ class TestMonitoring: '--config-json', '-', '--tcp-ports', '9095' ], - stdin=json.dumps({"files": {"prometheus.yml": y}}), + stdin=json.dumps({"files": {"prometheus.yml": y}, 'retention_time': '15d'}), image='') @patch("cephadm.serve.CephadmServe._run_cephadm") diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py index e105ea8ebdf..17130ea9a37 100644 --- a/src/python-common/ceph/deployment/service_spec.py +++ b/src/python-common/ceph/deployment/service_spec.py @@ -466,7 +466,7 @@ class ServiceSpec(object): 'container': CustomContainerSpec, 'grafana': GrafanaSpec, 'node-exporter': MonitoringSpec, - 'prometheus': MonitoringSpec, + 'prometheus': PrometheusSpec, 'snmp-gateway': SNMPGatewaySpec, }.get(service_type, cls) if ret == ServiceSpec and not service_type: @@ -1180,6 +1180,32 @@ class GrafanaSpec(MonitoringSpec): yaml.add_representer(GrafanaSpec, ServiceSpec.yaml_representer) +class PrometheusSpec(MonitoringSpec): + def __init__(self, + service_type: str = 'prometheus', + service_id: Optional[str] = None, + placement: Optional[PlacementSpec] = None, + unmanaged: bool = False, + preview_only: bool = False, + config: Optional[Dict[str, str]] = None, + networks: Optional[List[str]] = None, + port: Optional[int] = None, + retention_time: Optional[str] = None, + extra_container_args: Optional[List[str]] = None, + ): + assert service_type == 'prometheus' + super(PrometheusSpec, self).__init__( + 'prometheus', service_id=service_id, + placement=placement, unmanaged=unmanaged, + preview_only=preview_only, config=config, networks=networks, port=port, + extra_container_args=extra_container_args) + + self.retention_time = retention_time + + +yaml.add_representer(PrometheusSpec, ServiceSpec.yaml_representer) + + class SNMPGatewaySpec(ServiceSpec): class SNMPVersion(str, enum.Enum): V2c = 'V2c'