]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: allow setting prometheus retention time
authorAdam King <adking@redhat.com>
Mon, 22 Aug 2022 15:14:12 +0000 (11:14 -0400)
committerAdam King <adking@redhat.com>
Wed, 14 Sep 2022 19:52:25 +0000 (15:52 -0400)
When we deploy Prometheus server, we don't provide any
ability to define the tsdb retention time - so it defaults to 15d.

This change adds a field that can be passed in a prometheus service
spec that will be passed as an arg to the --storage.tsdb.retention.time
parameter for the prometheus daemon.

Fixes: https://tracker.ceph.com/issues/54308
Signed-off-by: Adam King <adking@redhat.com>
(cherry picked from commit 91dd03fd648d25773a83fdad311b62b781619fc4)

Conflicts:
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/tests/test_services.py
src/python-common/ceph/deployment/service_spec.py

src/cephadm/cephadm
src/cephadm/tests/test_cephadm.py
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/tests/test_services.py
src/python-common/ceph/deployment/service_spec.py

index 46c3d7fce5e8d65b726447ff9983e2d96835e40f..c91f1dc736beb7ea85d365d0d6ce53d42ededf5d 100755 (executable)
@@ -2376,6 +2376,9 @@ def get_daemon_args(ctx, fsid, daemon_type, daemon_id):
                     port = meta['ports'][0]
             r += [f'--web.listen-address={ip}:{port}']
             if daemon_type == 'prometheus':
+                config = get_parm(ctx.config_json)
+                retention_time = config.get('retention_time', '15d')
+                r += [f'--storage.tsdb.retention.time={retention_time}']
                 scheme = 'http'
                 host = get_fqdn()
                 r += [f'--web.external-url={scheme}://{host}:{port}']
index d513dcb898291d0b70fdf3e421b7ef987abbe5c0..c57963d4dc9ad67a6c1bc45b6076a97e7ed6abb4 100644 (file)
@@ -1095,6 +1095,7 @@ class TestMonitoring(object):
 
     def test_prometheus_external_url(self):
         ctx = cd.CephadmContext()
+        ctx.config_json = json.dumps({'files': {}, 'retention_time': '15d'})
         daemon_type = 'prometheus'
         daemon_id = 'home'
         fsid = 'aaf5a720-13fe-4a3b-82b9-2d99b7fd9704'
index 9c2a08b6d70e95def0cf6e78a8fb42d0212677ac..ef47d2cf932a2bf1005fcc9328e3f953b23ab7f6 100644 (file)
@@ -9,7 +9,8 @@ from urllib.parse import urlparse
 from mgr_module import HandleCommandResult
 
 from orchestrator import DaemonDescription
-from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, SNMPGatewaySpec
+from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, \
+    SNMPGatewaySpec, PrometheusSpec
 from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec
 from cephadm.services.ingress import IngressSpec
 from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert, build_url
@@ -267,6 +268,13 @@ class PrometheusService(CephadmService):
         assert self.TYPE == daemon_spec.daemon_type
         deps = []  # type: List[str]
 
+        prom_spec = cast(PrometheusSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
+
+        try:
+            retention_time = prom_spec.retention_time if prom_spec.retention_time else '15d'
+        except AttributeError:
+            retention_time = '15d'
+
         # scrape mgrs
         mgr_scrape_list = []
         mgr_map = self.mgr.get('mgr_map')
@@ -339,12 +347,13 @@ class PrometheusService(CephadmService):
             'haproxy_targets': haproxy_targets,
             'nodes': nodes,
         }
-        r = {
+        r: Dict[str, Any] = {
             'files': {
                 'prometheus.yml':
                     self.mgr.template.render(
                         'services/prometheus/prometheus.yml.j2', context)
-            }
+            },
+            'retention_time': retention_time
         }
 
         # include alerts, if present in the container
index d1945d37227b04fc263ab0bb77eadb015674465f..5eac052d08aa804189f0b53263988548565f2100 100644 (file)
@@ -425,7 +425,7 @@ class TestMonitoring:
                         '--config-json', '-',
                         '--tcp-ports', '9095'
                     ],
-                    stdin=json.dumps({"files": {"prometheus.yml": y}}),
+                    stdin=json.dumps({"files": {"prometheus.yml": y}, 'retention_time': '15d'}),
                     image='')
 
     @patch("cephadm.serve.CephadmServe._run_cephadm")
index e105ea8ebdff4aab813f0f3687563fb85f0197af..17130ea9a379c83b6d8286c9499117144fea4e61 100644 (file)
@@ -466,7 +466,7 @@ class ServiceSpec(object):
             'container': CustomContainerSpec,
             'grafana': GrafanaSpec,
             'node-exporter': MonitoringSpec,
-            'prometheus': MonitoringSpec,
+            'prometheus': PrometheusSpec,
             'snmp-gateway': SNMPGatewaySpec,
         }.get(service_type, cls)
         if ret == ServiceSpec and not service_type:
@@ -1180,6 +1180,32 @@ class GrafanaSpec(MonitoringSpec):
 yaml.add_representer(GrafanaSpec, ServiceSpec.yaml_representer)
 
 
+class PrometheusSpec(MonitoringSpec):
+    def __init__(self,
+                 service_type: str = 'prometheus',
+                 service_id: Optional[str] = None,
+                 placement: Optional[PlacementSpec] = None,
+                 unmanaged: bool = False,
+                 preview_only: bool = False,
+                 config: Optional[Dict[str, str]] = None,
+                 networks: Optional[List[str]] = None,
+                 port: Optional[int] = None,
+                 retention_time: Optional[str] = None,
+                 extra_container_args: Optional[List[str]] = None,
+                 ):
+        assert service_type == 'prometheus'
+        super(PrometheusSpec, self).__init__(
+            'prometheus', service_id=service_id,
+            placement=placement, unmanaged=unmanaged,
+            preview_only=preview_only, config=config, networks=networks, port=port,
+            extra_container_args=extra_container_args)
+
+        self.retention_time = retention_time
+
+
+yaml.add_representer(PrometheusSpec, ServiceSpec.yaml_representer)
+
+
 class SNMPGatewaySpec(ServiceSpec):
     class SNMPVersion(str, enum.Enum):
         V2c = 'V2c'