When we deploy Prometheus server, we don't provide any
ability to define the tsdb retention time - so it defaults to 15d.
This change adds a field that can be passed in a prometheus service
spec that will be passed as an arg to the --storage.tsdb.retention.time
parameter for the prometheus daemon.
Fixes: https://tracker.ceph.com/issues/54308
Signed-off-by: Adam King <adking@redhat.com>
(cherry picked from commit
91dd03fd648d25773a83fdad311b62b781619fc4)
Conflicts:
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/tests/test_services.py
port = meta['ports'][0]
r += [f'--web.listen-address={ip}:{port}']
if daemon_type == 'prometheus':
+ config = get_parm(ctx.config_json)
+ retention_time = config.get('retention_time', '15d')
+ r += [f'--storage.tsdb.retention.time={retention_time}']
scheme = 'http'
host = get_fqdn()
r += [f'--web.external-url={scheme}://{host}:{port}']
def test_prometheus_external_url(self):
ctx = cd.CephadmContext()
+ ctx.config_json = json.dumps({'files': {}, 'retention_time': '15d'})
daemon_type = 'prometheus'
daemon_id = 'home'
fsid = 'aaf5a720-13fe-4a3b-82b9-2d99b7fd9704'
from mgr_module import HandleCommandResult
from orchestrator import DaemonDescription
-from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, SNMPGatewaySpec
+from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, \
+ SNMPGatewaySpec, PrometheusSpec
from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec
from cephadm.services.ingress import IngressSpec
from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert, build_url
daemon_spec: CephadmDaemonDeploySpec,
) -> Tuple[Dict[str, Any], List[str]]:
assert self.TYPE == daemon_spec.daemon_type
+
+ prom_spec = cast(PrometheusSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
+
+ try:
+ retention_time = prom_spec.retention_time if prom_spec.retention_time else '15d'
+ except AttributeError:
+ retention_time = '15d'
+
deps = [] # type: List[str]
# scrape mgrs
'haproxy_targets': haproxy_targets,
'nodes': nodes,
}
- r = {
+ r: Dict[str, Any] = {
'files': {
'prometheus.yml':
self.mgr.template.render(
'services/prometheus/prometheus.yml.j2', context)
- }
+ },
+ 'retention_time': retention_time
}
# include alerts, if present in the container
'--tcp-ports', '9095'
],
stdin=json.dumps({"files": {"prometheus.yml": y,
- "/etc/prometheus/alerting/custom_alerts.yml": ""}}),
+ "/etc/prometheus/alerting/custom_alerts.yml": ""}, 'retention_time': '15d'}),
image='')
@patch("cephadm.serve.CephadmServe._run_cephadm")
'container': CustomContainerSpec,
'grafana': GrafanaSpec,
'node-exporter': MonitoringSpec,
- 'prometheus': MonitoringSpec,
+ 'prometheus': PrometheusSpec,
'loki': MonitoringSpec,
'promtail': MonitoringSpec,
'snmp-gateway': SNMPGatewaySpec,
yaml.add_representer(GrafanaSpec, ServiceSpec.yaml_representer)
+class PrometheusSpec(MonitoringSpec):
+ def __init__(self,
+ service_type: str = 'prometheus',
+ service_id: Optional[str] = None,
+ placement: Optional[PlacementSpec] = None,
+ unmanaged: bool = False,
+ preview_only: bool = False,
+ config: Optional[Dict[str, str]] = None,
+ networks: Optional[List[str]] = None,
+ port: Optional[int] = None,
+ retention_time: Optional[str] = None,
+ extra_container_args: Optional[List[str]] = None,
+ custom_configs: Optional[List[CustomConfig]] = None,
+ ):
+ assert service_type == 'prometheus'
+ super(PrometheusSpec, self).__init__(
+ 'prometheus', service_id=service_id,
+ placement=placement, unmanaged=unmanaged,
+ preview_only=preview_only, config=config, networks=networks, port=port,
+ extra_container_args=extra_container_args, custom_configs=custom_configs)
+
+ self.retention_time = retention_time
+
+
+yaml.add_representer(PrometheusSpec, ServiceSpec.yaml_representer)
+
+
class SNMPGatewaySpec(ServiceSpec):
class SNMPVersion(str, enum.Enum):
V2c = 'V2c'