Add cli commands to add/remove remote_write section to prometheus configuration template
Fixes: https://tracker.ceph.com/issues/76316
Signed-off-by: Aashish Sharma <aasharma@redhat.com>
self.daemon_action(action='redeploy', daemon_name=daemon.daemon_name)
return 'prometheus multi-cluster targets updated'
+ @handle_orch_error
+ def set_prometheus_remote_write(self, url: str, remote_write_allowed_metrics: List[str]) -> str:
+ if not url or not url.strip():
+ return 'Invalid URL. URL cannot be empty.'
+
+ try:
+ parsed_url = urlparse(url)
+ host = parsed_url.hostname
+
+ if parsed_url.scheme not in ('http', 'https'):
+ return 'Invalid URL. Scheme must be http or https.'
+ if not host:
+ return 'Invalid URL. Hostname is missing.'
+ except ValueError as e:
+ return f'Invalid url. {str(e)}'
+
+ prometheus_spec = cast(PrometheusSpec, self.spec_store['prometheus'].spec)
+ if not prometheus_spec:
+ return "Service prometheus not found\n"
+
+ if url == prometheus_spec.remote_write_url:
+ return f"Remote write URL '{url}' already exists.\n"
+
+ prometheus_spec.remote_write_url = url
+ prometheus_spec.remote_write_allowed_metrics = '|'.join(remote_write_allowed_metrics)
+
+ spec = ServiceSpec.from_json(prometheus_spec.to_json())
+ self.apply([spec], no_overwrite=False)
+
+ return 'prometheus remote write updated'
+
+ @handle_orch_error
+ def remove_prometheus_remote_write(self, url: str) -> str:
+ if not url or not url.strip():
+ return 'Invalid URL. URL cannot be empty.'
+
+ prometheus_spec = cast(PrometheusSpec, self.spec_store['prometheus'].spec)
+ if url == prometheus_spec.remote_write_url:
+ prometheus_spec.remote_write_url = ''
+ prometheus_spec.remote_write_allowed_metrics = ''
+ else:
+ return f"Remote write URL '{url}' does not exist.\n"
+ if not prometheus_spec:
+ return "Service prometheus not found\n"
+
+ spec = ServiceSpec.from_json(prometheus_spec.to_json())
+ self.apply([spec], no_overwrite=False)
+ return 'prometheus remote write removed'
+
@handle_orch_error
def set_alertmanager_access_info(self, user: str, password: str) -> str:
self.set_store(AlertmanagerService.USER_CFG_KEY, user)
retention_time = get_field_from_spec(spec, 'retention_time', '15d')
retention_size = get_field_from_spec(spec, 'retention_size', '0')
targets = get_field_from_spec(spec, 'targets', [])
+ remote_write_url = get_field_from_spec(spec, 'remote_write_url', '')
+ remote_write_allowed_metrics = get_field_from_spec(spec, 'remote_write_allowed_metrics', '')
# build service discovery end-point
security_enabled, mgmt_gw_enabled, oauth2_enabled = self.mgr._get_security_config()
'service_discovery_password': self.mgr.http_server.service_discovery.password,
'service_discovery_cfg': self.get_service_discovery_cfg(security_enabled, mgmt_gw_enabled),
'external_prometheus_targets': targets,
+ 'remote_write_url': remote_write_url,
+ 'remote_write_allowed_metrics': remote_write_allowed_metrics,
'cluster_fsid': self.mgr._cluster_fsid,
'clusters_credentials': cluster_credentials,
'federate_path': federate_path
# Ceph mgrs are dependency because when mgmt-gateway is not enabled the service-discovery depends on mgrs ips
deps += mgr.cache.get_daemons_by_types(['mgr'])
+ if spec:
+ prometheus_spec = cast(PrometheusSpec, spec)
+
+ deps.append(f'remote_write_url:{prometheus_spec.remote_write_url}')
+ deps.append(f'remote_write_metrics:{prometheus_spec.remote_write_allowed_metrics}')
+
return sorted(deps)
def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
static_configs:
- targets: ['{{ url }}']
{% endfor %}
+
+{% if remote_write_url != '' and remote_write_allowed_metrics != '' %}
+remote_write:
+ - url: {{ remote_write_url }}/api/v1/write
+ tls_config:
+ insecure_skip_verify: true
+ write_relabel_configs:
+ - source_labels: [__name__]
+ regex: '^({{ remote_write_allowed_metrics }})$'
+ action: keep
+ - source_labels: [__name__]
+ regex: 'ALERTS|ALERTS_FOR_STATE'
+ action: drop
+{% endif %}
- url: http://[::1]:8765/sd/prometheus/sd-config?service=nvmeof
+
""").lstrip()
_run_cephadm.assert_called_with(
"use_url_prefix": False
},
}),
- error_ok=True,
use_current_daemon_image=False,
+ error_ok=True,
)
@patch("cephadm.module.CephadmOrchestrator.get_unique_name")
key_file: prometheus.key
+
""").lstrip()
_run_cephadm.assert_called_with(
'web_config': '/etc/prometheus/web.yml'
},
}),
- error_ok=True,
use_current_daemon_image=False,
+ error_ok=True,
)
@patch("cephadm.serve.CephadmServe._run_cephadm")
import tempfile
import time
from datetime import datetime
-from typing import NamedTuple, Optional
+from typing import List, NamedTuple, Optional
import requests
return self.alert_proxy('GET', f'/alerts/groups?filter=cluster={fsid}', params)
return self.alert_proxy('GET', '/alerts/groups', params)
+ @RESTController.Collection(method='PUT', path='/set_remote_write')
+ def set_remote_write(self, remote_write_url: str, remote_write_allowed_metrics: List[str]):
+ orch_client = OrchClient.instance()
+ return orch_client.monitoring.set_prometheus_remote_write(remote_write_url,
+ remote_write_allowed_metrics)
+
+ @RESTController.Collection(method='PUT', path='/remove_remote_write')
+ def remove_remote_write(self, url: str):
+ orch_client = OrchClient.instance()
+ return orch_client.monitoring.remove_prometheus_remote_write(url)
+
@RESTController.Collection(method='GET', path='/prometheus_query_data')
def get_prometeus_query_data(self, **params):
params['query'] = params.pop('params')
- jwt: []
tags:
- Prometheus
+ /api/prometheus/remove_remote_write:
+ put:
+ parameters: []
+ requestBody:
+ content:
+ application/json:
+ schema:
+ properties:
+ url:
+ type: string
+ required:
+ - url
+ type: object
+ responses:
+ '200':
+ content:
+ application/json:
+ schema:
+ type: object
+ application/vnd.ceph.api.v1.0+json:
+ schema:
+ type: object
+ description: Resource updated.
+ '202':
+ content:
+ application/json:
+ schema:
+ type: object
+ application/vnd.ceph.api.v1.0+json:
+ schema:
+ type: object
+ description: Operation is still executing. Please check the task queue.
+ '400':
+ description: Operation exception. Please check the response body for details.
+ '401':
+ description: Unauthenticated access. Please login first.
+ '403':
+ description: Unauthorized access. Please check your permissions.
+ '500':
+ description: Unexpected error. Please check the response body for the stack
+ trace.
+ security:
+ - jwt: []
+ tags:
+ - Prometheus
/api/prometheus/rules:
get:
parameters: []
- jwt: []
tags:
- Prometheus
+ /api/prometheus/set_remote_write:
+ put:
+ parameters: []
+ requestBody:
+ content:
+ application/json:
+ schema:
+ properties:
+ remote_write_allowed_metrics:
+ type: string
+ remote_write_url:
+ type: string
+ required:
+ - remote_write_url
+ - remote_write_allowed_metrics
+ type: object
+ responses:
+ '200':
+ content:
+ application/json:
+ schema:
+ type: object
+ application/vnd.ceph.api.v1.0+json:
+ schema:
+ type: object
+ description: Resource updated.
+ '202':
+ content:
+ application/json:
+ schema:
+ type: object
+ application/vnd.ceph.api.v1.0+json:
+ schema:
+ type: object
+ description: Operation is still executing. Please check the task queue.
+ '400':
+ description: Operation exception. Please check the response body for details.
+ '401':
+ description: Unauthenticated access. Please login first.
+ '403':
+ description: Unauthorized access. Please check your permissions.
+ '500':
+ description: Unexpected error. Please check the response body for the stack
+ trace.
+ security:
+ - jwt: []
+ tags:
+ - Prometheus
/api/prometheus/silence:
post:
parameters: []
"""Get security config information"""
return self.api.get_security_config()
+ @wait_api_result
+ def set_prometheus_remote_write(self, remote_write_url: str,
+ remote_write_allowed_metrics: List[str]) -> str:
+ """Set Prometheus remote write configuration"""
+ return self.api.set_prometheus_remote_write(remote_write_url,
+ remote_write_allowed_metrics)
+
+ @wait_api_result
+ def remove_prometheus_remote_write(self, remote_write_url: str) -> str:
+ """Remove Prometheus remote write configuration"""
+ return self.api.remove_prometheus_remote_write(remote_write_url)
+
class OrchClient(object):
"""remove prometheus target for multi-cluster"""
raise NotImplementedError()
+ def set_prometheus_remote_write(self, url: str, remote_write_allowed_metrics: List[str]) -> OrchResult[str]:
+ """set prometheus remote write url and allowed metrics for multi-cluster"""
+ raise NotImplementedError()
+
+ def remove_prometheus_remote_write(self, url: str) -> OrchResult[str]:
+ """remove prometheus remote write url and allowed metrics for multi-cluster"""
+ raise NotImplementedError()
+
def get_alertmanager_access_info(self) -> OrchResult[Dict[str, str]]:
"""get alertmanager access information"""
raise NotImplementedError()
result = raise_if_exception(completion)
return HandleCommandResult(stdout=json.dumps(result))
+ @OrchestratorCLICommand.Write('orch prometheus set-remote-write')
+ def _set_prometheus_remote_write(self, url: str, remote_write_allowed_metrics: List[str]) -> HandleCommandResult:
+ completion = self.set_prometheus_remote_write(url, remote_write_allowed_metrics)
+ result = raise_if_exception(completion)
+ return HandleCommandResult(stdout=json.dumps(result))
+
+ @OrchestratorCLICommand.Write('orch prometheus remove-remote-write')
+ def _remove_prometheus_remote_write(self, url: str) -> HandleCommandResult:
+ completion = self.remove_prometheus_remote_write(url)
+ result = raise_if_exception(completion)
+ return HandleCommandResult(stdout=json.dumps(result))
+
@OrchestratorCLICommand.Write('orch alertmanager set-credentials')
def _set_alertmanager_access_info(self, username: Optional[str] = None, password: Optional[str] = None, inbuf: Optional[str] = None) -> HandleCommandResult:
try:
preview_only: bool = False,
networks: Optional[List[str]] = None,
targets: Optional[List[str]] = None,
+ remote_write_url: Optional[str] = None,
+ remote_write_allowed_metrics: Optional[str] = None,
extra_container_args: Optional[GeneralArgList] = None,
extra_entrypoint_args: Optional[GeneralArgList] = None,
custom_configs: Optional[List[CustomConfig]] = None,
#: :ref:`cephadm-rgw-networks` and :ref:`cephadm-mgr-networks`.
self.networks: List[str] = networks or []
self.targets: List[str] = targets or []
+ self.remote_write_url = remote_write_url
+ self.remote_write_allowed_metrics = remote_write_allowed_metrics
self.config: Optional[Dict[str, str]] = None
if config:
preview_only: bool = False,
port: Optional[int] = None,
targets: Optional[List[str]] = None,
+ remote_write_url: Optional[str] = None,
+ remote_write_allowed_metrics: Optional[str] = None,
extra_container_args: Optional[GeneralArgList] = None,
extra_entrypoint_args: Optional[GeneralArgList] = None,
custom_configs: Optional[List[CustomConfig]] = None,
preview_only=preview_only, config=config,
networks=networks, extra_container_args=extra_container_args,
extra_entrypoint_args=extra_entrypoint_args,
- custom_configs=custom_configs, targets=targets)
+ custom_configs=custom_configs, targets=targets,
+ remote_write_url=remote_write_url,
+ remote_write_allowed_metrics=remote_write_allowed_metrics)
self.service_type = service_type
self.port = port
retention_time: Optional[str] = None,
retention_size: Optional[str] = None,
targets: Optional[List[str]] = None,
+ remote_write_url: Optional[str] = None,
+ remote_write_allowed_metrics: Optional[str] = None,
extra_container_args: Optional[GeneralArgList] = None,
extra_entrypoint_args: Optional[GeneralArgList] = None,
custom_configs: Optional[List[CustomConfig]] = None,
ssl=ssl, certificate_source=certificate_source,
preview_only=preview_only, config=config, networks=networks, port=port, targets=targets,
extra_container_args=extra_container_args, extra_entrypoint_args=extra_entrypoint_args,
- custom_configs=custom_configs)
+ custom_configs=custom_configs, remote_write_url=remote_write_url,
+ remote_write_allowed_metrics=remote_write_allowed_metrics)
self.retention_time = retention_time.strip() if retention_time else None
self.retention_size = retention_size.strip() if retention_size else None