From 3551d7f8b36d883a72b85f0bd5568a33ac00e62c Mon Sep 17 00:00:00 2001 From: Patrick Seidensal Date: Thu, 25 Aug 2022 14:47:52 +0200 Subject: [PATCH] mgr/dashboard: enable addition custom Prometheus alerts Fixes: https://tracker.ceph.com/issues/57294 Signed-off-by: Patrick Seidensal --- doc/cephadm/services/monitoring.rst | 10 ++++++++++ src/pybind/mgr/cephadm/services/monitoring.py | 17 +++++++++++++++++ src/pybind/mgr/cephadm/tests/test_services.py | 3 ++- 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/doc/cephadm/services/monitoring.rst b/doc/cephadm/services/monitoring.rst index c316da67f79..e64778ecceb 100644 --- a/doc/cephadm/services/monitoring.rst +++ b/doc/cephadm/services/monitoring.rst @@ -231,6 +231,7 @@ set``: - ``services/grafana/ceph-dashboard.yml`` - ``services/grafana/grafana.ini`` - ``services/prometheus/prometheus.yml`` +- ``services/prometheus/alerting/custom_alerts.yml`` - ``services/loki.yml`` - ``services/promtail.yml`` @@ -280,6 +281,15 @@ Example # reconfig the prometheus service ceph orch reconfig prometheus +.. code-block:: bash + + # set additional custom alerting rules for Prometheus + ceph config-key set mgr/cephadm/services/prometheus/alerting/custom_alerts.yml \ + -i $PWD/custom_alerts.yml + + # Note that custom alerting rules are not parsed by Jinja and hence escaping + # will not be an issue. + Deploying monitoring without cephadm ------------------------------------ diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index c98a8345db8..bcffa15f0eb 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -319,6 +319,23 @@ class PrometheusService(CephadmService): alerts = f.read() r['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts + # Include custom alerts if present in key value store. This enables the + # users to add custom alerts. Write the file in any case, so that if the + # content of the key value store changed, that file is overwritten + # (emptied in case they value has been removed from the key value + # store). This prevents the necessity to adapt `cephadm` binary to + # remove the file. + # + # Don't use the template engine for it as + # + # 1. the alerts are always static and + # 2. they are a template themselves for the Go template engine, which + # use curly braces and escaping that is cumbersome and unnecessary + # for the user. + # + r['files']['/etc/prometheus/alerting/custom_alerts.yml'] = \ + self.mgr.get_store('services/prometheus/alerting/custom_alerts.yml', '') + return r, sorted(self.calculate_deps()) def calculate_deps(self) -> List[str]: diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index d4d95da3aad..6b1afb8bc80 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -430,7 +430,8 @@ class TestMonitoring: '--config-json', '-', '--tcp-ports', '9095' ], - stdin=json.dumps({"files": {"prometheus.yml": y, "root_cert.pem": ''}}), + stdin=json.dumps({"files": {"prometheus.yml": y, "root_cert.pem": '', + "/etc/prometheus/alerting/custom_alerts.yml": ""}}), image='') @patch("cephadm.serve.CephadmServe._run_cephadm") -- 2.39.5