]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: fix custom alertmanager webhooks 59889/head
authorAdam King <adking@redhat.com>
Thu, 19 Sep 2024 20:40:20 +0000 (16:40 -0400)
committerAdam King <adking@redhat.com>
Mon, 5 May 2025 19:21:26 +0000 (15:21 -0400)
Previously, we put the custom webhooks in the "default"
receiver. This didn't actually work as alertmanager only sends
alerts to the default receiver if the alert does not match
the following routes. This meant if you wanted alerts sent
to the dashboard and also a custom location, you weren't
able to do so with the template cephadm provided.

This also swaps to using "webhook_urls" instead of
"default_webhook_urls" for what should be in the spec,
but "default_webhook_urls" is kept working for backwards
compatability

Fixes: https://tracker.ceph.com/issues/68157
Signed-off-by: Adam King <adking@redhat.com>
doc/cephadm/services/monitoring.rst
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2
src/pybind/mgr/cephadm/tests/test_services.py
src/python-common/ceph/deployment/service_spec.py

index ef987fd7bd32ffb476f1fe6e6243d531d1fff491..0eae8751c96f6698e649cd325186802185a55be6 100644 (file)
@@ -598,7 +598,7 @@ webhook urls like so:
     service_type: alertmanager
     spec:
       user_data:
-        default_webhook_urls:
+        webhook_urls:
         - "https://foo"
         - "https://bar"
 
index 5fe60c4a9bbd25fdddaaed7342d295fa26ebd6e2..f040218b305f3206a2b447b69ba67a1a7f28b950 100644 (file)
@@ -311,7 +311,7 @@ class AlertmanagerService(CephadmService):
 
     def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
         assert self.TYPE == daemon_spec.daemon_type
-        default_webhook_urls: List[str] = []
+        webhook_urls: List[str] = []
 
         spec = cast(AlertManagerSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
         try:
@@ -321,7 +321,10 @@ class AlertmanagerService(CephadmService):
         user_data = spec.user_data
         if 'default_webhook_urls' in user_data and isinstance(
                 user_data['default_webhook_urls'], list):
-            default_webhook_urls.extend(user_data['default_webhook_urls'])
+            webhook_urls.extend(user_data['default_webhook_urls'])
+        if 'webhook_urls' in user_data and isinstance(
+                user_data['webhook_urls'], list):
+            webhook_urls.extend(user_data['webhook_urls'])
 
         security_enabled, mgmt_gw_enabled, oauth2_enabled = self.mgr._get_security_config()
         if mgmt_gw_enabled:
@@ -340,7 +343,7 @@ class AlertmanagerService(CephadmService):
         context = {
             'security_enabled': security_enabled,
             'dashboard_urls': dashboard_urls,
-            'default_webhook_urls': default_webhook_urls,
+            'webhook_urls': webhook_urls,
             'snmp_gateway_urls': snmp_gateway_urls,
             'secure': secure,
         }
index b6955caf616b56e0dd706a4b19ab8591e3b189c6..671778601ed173982515a6934f37d645c042451a 100644 (file)
@@ -23,6 +23,14 @@ route:
       group_interval: 10s
       repeat_interval: 1h
       receiver: 'ceph-dashboard'
+{% if webhook_urls %}
+      continue: true
+    - group_by: ['alertname']
+      group_wait: 10s
+      group_interval: 10s
+      repeat_interval: 1h
+      receiver: 'custom-receiver'
+{% endif %}
 {% if snmp_gateway_urls %}
       continue: true
     - receiver: 'snmp-gateway'
@@ -36,7 +44,9 @@ route:
 receivers:
 - name: 'default'
   webhook_configs:
-{% for url in default_webhook_urls %}
+- name: 'custom-receiver'
+  webhook_configs:
+{% for url in webhook_urls %}
   - url: '{{ url }}'
 {% endfor %}
 - name: 'ceph-dashboard'
index cb1aa8bc1ae9c1e5ee970db20c7fdeb0a6a1bf75..88d0e6b4d4ec7b25392c225782b01fbb76b65cfe 100644 (file)
@@ -564,6 +564,8 @@ class TestMonitoring:
         receivers:
         - name: 'default'
           webhook_configs:
+        - name: 'custom-receiver'
+          webhook_configs:
         - name: 'ceph-dashboard'
           webhook_configs:
           - url: '{url}/api/prometheus_receiver'
@@ -711,6 +713,8 @@ class TestMonitoring:
                 receivers:
                 - name: 'default'
                   webhook_configs:
+                - name: 'custom-receiver'
+                  webhook_configs:
                 - name: 'ceph-dashboard'
                   webhook_configs:
                   - url: 'https://host_fqdn:29443/internal/dashboard/api/prometheus_receiver'
@@ -810,6 +814,8 @@ class TestMonitoring:
                 receivers:
                 - name: 'default'
                   webhook_configs:
+                - name: 'custom-receiver'
+                  webhook_configs:
                 - name: 'ceph-dashboard'
                   webhook_configs:
                   - url: 'http://{fqdn}:8080/api/prometheus_receiver'
@@ -864,6 +870,119 @@ class TestMonitoring:
                     use_current_daemon_image=False,
                 )
 
+    @pytest.mark.parametrize(
+        "user_data",
+        [
+            ({'webhook_urls': ['http://foo.com:9999', 'http://bar.com:1111']}),
+            ({'default_webhook_urls': ['http://bar.com:9999', 'http://foo.com:1111']}),
+            ({'default_webhook_urls': ['http://bar.com:9999', 'http://foo.com:1111'],
+              'webhook_urls': ['http://foo.com:9999', 'http://bar.com:1111']}),
+        ],
+    )
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    @patch("socket.getfqdn")
+    @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1')
+    @patch("cephadm.services.monitoring.password_hash", lambda password: 'alertmanager_password_hash')
+    @patch('cephadm.cert_mgr.CertMgr.get_root_ca', lambda instance: 'cephadm_root_cert')
+    @patch('cephadm.cert_mgr.CertMgr.generate_cert', lambda instance, fqdn, ip: ('mycert', 'mykey'))
+    def test_alertmanager_config_custom_webhook_urls(
+        self,
+        _get_fqdn,
+        _run_cephadm,
+        cephadm_module: CephadmOrchestrator,
+        user_data: Dict[str, List[str]]
+    ):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        cephadm_module.set_store(AlertmanagerService.USER_CFG_KEY, 'alertmanager_user')
+        cephadm_module.set_store(AlertmanagerService.PASS_CFG_KEY, 'alertmanager_plain_password')
+        fqdn = 'host1.test'
+        _get_fqdn.return_value = fqdn
+
+        print(user_data)
+
+        urls = []
+        if 'default_webhook_urls' in user_data:
+            urls += user_data['default_webhook_urls']
+        if 'webhook_urls' in user_data:
+            urls += user_data['webhook_urls']
+        tab_over = ' ' * 18  # since we'll be inserting this into an indented string
+        webhook_configs_str = '\n'.join(f'{tab_over}- url: \'{u}\'' for u in urls)
+
+        with with_host(cephadm_module, 'test'):
+            with with_service(cephadm_module, AlertManagerSpec(user_data=user_data)):
+
+                y = dedent(f"""
+                # This file is generated by cephadm.
+                # See https://prometheus.io/docs/alerting/configuration/ for documentation.
+
+                global:
+                  resolve_timeout: 5m
+                  http_config:
+                    tls_config:
+                      insecure_skip_verify: true
+
+                route:
+                  receiver: 'default'
+                  routes:
+                    - group_by: ['alertname']
+                      group_wait: 10s
+                      group_interval: 10s
+                      repeat_interval: 1h
+                      receiver: 'ceph-dashboard'
+                      continue: true
+                    - group_by: ['alertname']
+                      group_wait: 10s
+                      group_interval: 10s
+                      repeat_interval: 1h
+                      receiver: 'custom-receiver'
+
+                receivers:
+                - name: 'default'
+                  webhook_configs:
+                - name: 'custom-receiver'
+                  webhook_configs:
+{webhook_configs_str}
+                - name: 'ceph-dashboard'
+                  webhook_configs:
+                  - url: 'http://{fqdn}:8080/api/prometheus_receiver'
+                """).lstrip()
+
+                _run_cephadm.assert_called_with(
+                    'test',
+                    "alertmanager.test",
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": 'alertmanager.test',
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [9093, 9094],
+                        },
+                        "meta": {
+                            'service_name': 'alertmanager',
+                            'ports': [9093, 9094],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": {
+                            "files": {
+                                "alertmanager.yml": y,
+                            },
+                            'peers': [],
+                            "use_url_prefix": False,
+                            "ip_to_bind_to": "",
+                        }
+                    }),
+                    use_current_daemon_image=False,
+                    error_ok=True,
+                )
+
     @patch("cephadm.serve.CephadmServe._run_cephadm")
     @patch("socket.getfqdn")
     @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1')
index b2a089f50c00ec35dcff2ce80a11164318e7e368..e4c69370f922bea4d9c9bbf2ce27d13f7e3531cf 100644 (file)
@@ -2479,14 +2479,14 @@ class AlertManagerSpec(MonitoringSpec):
         # service_type: alertmanager
         # service_id: xyz
         # user_data:
-        #   default_webhook_urls:
+        #   webhook_urls:
         #   - "https://foo"
         #   - "https://bar"
         #
         # Documentation:
-        # default_webhook_urls - A list of additional URL's that are
-        #                        added to the default receivers'
-        #                        <webhook_configs> configuration.
+        # webhook_urls - A list of additional URL's that are
+        #                added to the default receivers'
+        #                <webhook_configs> configuration.
         self.user_data = user_data or {}
         self.secure = secure
         self.only_bind_port_on_networks = only_bind_port_on_networks