]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: allow setting prometheus retention time
authorAdam King <adking@redhat.com>
Mon, 22 Aug 2022 15:14:12 +0000 (11:14 -0400)
committerAdam King <adking@redhat.com>
Thu, 1 Sep 2022 18:48:56 +0000 (14:48 -0400)
When we deploy Prometheus server, we don't provide any
ability to define the tsdb retention time - so it defaults to 15d.

This change adds a field that can be passed in a prometheus service
spec that will be passed as an arg to the --storage.tsdb.retention.time
parameter for the prometheus daemon.

Fixes: https://tracker.ceph.com/issues/54308
Signed-off-by: Adam King <adking@redhat.com>
src/cephadm/cephadm
src/cephadm/tests/test_cephadm.py
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/tests/test_services.py
src/python-common/ceph/deployment/service_spec.py

index d92451165d2480d29d19bbd0b6010da7e72e3312..6e097f863a8654ae3322f2e8e179770b389e4913 100755 (executable)
@@ -2637,6 +2637,9 @@ def get_daemon_args(ctx, fsid, daemon_type, daemon_id):
                     port = meta['ports'][0]
             r += [f'--web.listen-address={ip}:{port}']
             if daemon_type == 'prometheus':
+                config = get_parm(ctx.config_json)
+                retention_time = config.get('retention_time', '15d')
+                r += [f'--storage.tsdb.retention.time={retention_time}']
                 scheme = 'http'
                 host = get_fqdn()
                 r += [f'--web.external-url={scheme}://{host}:{port}']
index 937d86290414a64a853f132a8b3d92d3a3615398..2702c81eddb2734c8852edd61c2e6d3e0718de77 100644 (file)
@@ -1160,6 +1160,7 @@ class TestMonitoring(object):
 
     def test_prometheus_external_url(self):
         ctx = cd.CephadmContext()
+        ctx.config_json = json.dumps({'files': {}, 'retention_time': '15d'})
         daemon_type = 'prometheus'
         daemon_id = 'home'
         fsid = 'aaf5a720-13fe-4a3b-82b9-2d99b7fd9704'
index 107a4f74d06dc668e98c26219e024f63e304d735..f111e00bb2def358d6a60d1dc8bbea0743e98f05 100644 (file)
@@ -9,7 +9,8 @@ from urllib.parse import urlparse
 from mgr_module import HandleCommandResult
 
 from orchestrator import DaemonDescription
-from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, SNMPGatewaySpec
+from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, \
+    SNMPGatewaySpec, PrometheusSpec
 from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec
 from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert, build_url
 
@@ -290,6 +291,13 @@ class PrometheusService(CephadmService):
 
         assert self.TYPE == daemon_spec.daemon_type
 
+        spec = cast(PrometheusSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
+
+        try:
+            retention_time = spec.retention_time if spec.retention_time else '15d'
+        except AttributeError:
+            retention_time = '15d'
+
         t = self.mgr.get('mgr_map').get('services', {}).get('prometheus', None)
         sd_port = self.mgr.service_discovery_port
         srv_end_point = ''
@@ -319,11 +327,12 @@ class PrometheusService(CephadmService):
             'haproxy_sd_url': haproxy_sd_url,
         }
 
-        r = {
+        r: Dict[str, Any] = {
             'files': {
                 'prometheus.yml': self.mgr.template.render('services/prometheus/prometheus.yml.j2', context),
                 'root_cert.pem': self.mgr.http_server.service_discovery.ssl_certs.get_root_cert()
-            }
+            },
+            'retention_time': retention_time
         }
 
         # include alerts, if present in the container
index d6f16834e2415f8cac19b2661de91dd5453ae70b..22e58317af0d7b6d821c518ffaab67fa98526fde 100644 (file)
@@ -431,7 +431,7 @@ class TestMonitoring:
                         '--tcp-ports', '9095'
                     ],
                     stdin=json.dumps({"files": {"prometheus.yml": y, "root_cert.pem": '',
-                                                "/etc/prometheus/alerting/custom_alerts.yml": ""}}),
+                                                "/etc/prometheus/alerting/custom_alerts.yml": ""}, 'retention_time': '15d'}),
                     image='')
 
     @patch("cephadm.serve.CephadmServe._run_cephadm")
index 62543e692d67a655d0ec69f32e3547bf86ce98b3..9ff800f42eaf17d6db15786e752c21ef282ec59b 100644 (file)
@@ -520,7 +520,7 @@ class ServiceSpec(object):
             'container': CustomContainerSpec,
             'grafana': GrafanaSpec,
             'node-exporter': MonitoringSpec,
-            'prometheus': MonitoringSpec,
+            'prometheus': PrometheusSpec,
             'loki': MonitoringSpec,
             'promtail': MonitoringSpec,
             'snmp-gateway': SNMPGatewaySpec,
@@ -1261,6 +1261,33 @@ class GrafanaSpec(MonitoringSpec):
 yaml.add_representer(GrafanaSpec, ServiceSpec.yaml_representer)
 
 
+class PrometheusSpec(MonitoringSpec):
+    def __init__(self,
+                 service_type: str = 'prometheus',
+                 service_id: Optional[str] = None,
+                 placement: Optional[PlacementSpec] = None,
+                 unmanaged: bool = False,
+                 preview_only: bool = False,
+                 config: Optional[Dict[str, str]] = None,
+                 networks: Optional[List[str]] = None,
+                 port: Optional[int] = None,
+                 retention_time: Optional[str] = None,
+                 extra_container_args: Optional[List[str]] = None,
+                 custom_configs: Optional[List[CustomConfig]] = None,
+                 ):
+        assert service_type == 'prometheus'
+        super(PrometheusSpec, self).__init__(
+            'prometheus', service_id=service_id,
+            placement=placement, unmanaged=unmanaged,
+            preview_only=preview_only, config=config, networks=networks, port=port,
+            extra_container_args=extra_container_args, custom_configs=custom_configs)
+
+        self.retention_time = retention_time
+
+
+yaml.add_representer(PrometheusSpec, ServiceSpec.yaml_representer)
+
+
 class SNMPGatewaySpec(ServiceSpec):
     class SNMPVersion(str, enum.Enum):
         V2c = 'V2c'