]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: allow setting prometheus retention time
authorAdam King <adking@redhat.com>
Mon, 22 Aug 2022 15:14:12 +0000 (11:14 -0400)
committerAdam King <adking@redhat.com>
Fri, 16 Sep 2022 14:48:28 +0000 (10:48 -0400)
When we deploy Prometheus server, we don't provide any
ability to define the tsdb retention time - so it defaults to 15d.

This change adds a field that can be passed in a prometheus service
spec that will be passed as an arg to the --storage.tsdb.retention.time
parameter for the prometheus daemon.

Fixes: https://tracker.ceph.com/issues/54308
Signed-off-by: Adam King <adking@redhat.com>
(cherry picked from commit 91dd03fd648d25773a83fdad311b62b781619fc4)

Conflicts:
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/tests/test_services.py

src/cephadm/cephadm
src/cephadm/tests/test_cephadm.py
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/tests/test_services.py
src/python-common/ceph/deployment/service_spec.py

index 010370d2eff86c288c7ceda203da07a99d49d922..6bd8deabca4fea602b21da6a2d48c05d718a05cf 100755 (executable)
@@ -2594,6 +2594,9 @@ def get_daemon_args(ctx, fsid, daemon_type, daemon_id):
                     port = meta['ports'][0]
             r += [f'--web.listen-address={ip}:{port}']
             if daemon_type == 'prometheus':
+                config = get_parm(ctx.config_json)
+                retention_time = config.get('retention_time', '15d')
+                r += [f'--storage.tsdb.retention.time={retention_time}']
                 scheme = 'http'
                 host = get_fqdn()
                 r += [f'--web.external-url={scheme}://{host}:{port}']
index 7975e1f58ae82856951951696df9fe505cdc8e23..b5c6008e88d74b7947b6ed5510f6a4e22e13fd03 100644 (file)
@@ -1160,6 +1160,7 @@ class TestMonitoring(object):
 
     def test_prometheus_external_url(self):
         ctx = cd.CephadmContext()
+        ctx.config_json = json.dumps({'files': {}, 'retention_time': '15d'})
         daemon_type = 'prometheus'
         daemon_id = 'home'
         fsid = 'aaf5a720-13fe-4a3b-82b9-2d99b7fd9704'
index f99c79e795575531149e31cc5f2a97c52f612003..58b0fca2d78f9a1c582494b28c3eab86dec7a715 100644 (file)
@@ -9,7 +9,8 @@ from urllib.parse import urlparse
 from mgr_module import HandleCommandResult
 
 from orchestrator import DaemonDescription
-from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, SNMPGatewaySpec
+from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, \
+    SNMPGatewaySpec, PrometheusSpec
 from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec
 from cephadm.services.ingress import IngressSpec
 from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert, build_url
@@ -289,6 +290,14 @@ class PrometheusService(CephadmService):
             daemon_spec: CephadmDaemonDeploySpec,
     ) -> Tuple[Dict[str, Any], List[str]]:
         assert self.TYPE == daemon_spec.daemon_type
+
+        prom_spec = cast(PrometheusSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
+
+        try:
+            retention_time = prom_spec.retention_time if prom_spec.retention_time else '15d'
+        except AttributeError:
+            retention_time = '15d'
+
         deps = []  # type: List[str]
 
         # scrape mgrs
@@ -363,12 +372,13 @@ class PrometheusService(CephadmService):
             'haproxy_targets': haproxy_targets,
             'nodes': nodes,
         }
-        r = {
+        r: Dict[str, Any] = {
             'files': {
                 'prometheus.yml':
                     self.mgr.template.render(
                         'services/prometheus/prometheus.yml.j2', context)
-            }
+            },
+            'retention_time': retention_time
         }
 
         # include alerts, if present in the container
index 98dcc850f20505fb5b88723bf798b25d8c1a8235..175cb6b4df66c5ff427adec48a1456e46f0ad57b 100644 (file)
@@ -427,7 +427,7 @@ class TestMonitoring:
                         '--tcp-ports', '9095'
                     ],
                     stdin=json.dumps({"files": {"prometheus.yml": y,
-                                                "/etc/prometheus/alerting/custom_alerts.yml": ""}}),
+                                                "/etc/prometheus/alerting/custom_alerts.yml": ""}, 'retention_time': '15d'}),
                     image='')
 
     @patch("cephadm.serve.CephadmServe._run_cephadm")
index 72f3efa2e9132446cb3bd8811312d0f4c2974852..3f8bd315a394365d5e875ee86a87be6f783e7559 100644 (file)
@@ -519,7 +519,7 @@ class ServiceSpec(object):
             'container': CustomContainerSpec,
             'grafana': GrafanaSpec,
             'node-exporter': MonitoringSpec,
-            'prometheus': MonitoringSpec,
+            'prometheus': PrometheusSpec,
             'loki': MonitoringSpec,
             'promtail': MonitoringSpec,
             'snmp-gateway': SNMPGatewaySpec,
@@ -1255,6 +1255,33 @@ class GrafanaSpec(MonitoringSpec):
 yaml.add_representer(GrafanaSpec, ServiceSpec.yaml_representer)
 
 
+class PrometheusSpec(MonitoringSpec):
+    def __init__(self,
+                 service_type: str = 'prometheus',
+                 service_id: Optional[str] = None,
+                 placement: Optional[PlacementSpec] = None,
+                 unmanaged: bool = False,
+                 preview_only: bool = False,
+                 config: Optional[Dict[str, str]] = None,
+                 networks: Optional[List[str]] = None,
+                 port: Optional[int] = None,
+                 retention_time: Optional[str] = None,
+                 extra_container_args: Optional[List[str]] = None,
+                 custom_configs: Optional[List[CustomConfig]] = None,
+                 ):
+        assert service_type == 'prometheus'
+        super(PrometheusSpec, self).__init__(
+            'prometheus', service_id=service_id,
+            placement=placement, unmanaged=unmanaged,
+            preview_only=preview_only, config=config, networks=networks, port=port,
+            extra_container_args=extra_container_args, custom_configs=custom_configs)
+
+        self.retention_time = retention_time
+
+
+yaml.add_representer(PrometheusSpec, ServiceSpec.yaml_representer)
+
+
 class SNMPGatewaySpec(ServiceSpec):
     class SNMPVersion(str, enum.Enum):
         V2c = 'V2c'