]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
monitoring: use ceph_cephadm_daemon_status in ceph-nvmeof wip-vallari-test-29Sep-centos9-only
authorVallari Agrawal <vallari.agrawal@ibm.com>
Wed, 13 Aug 2025 11:09:04 +0000 (16:39 +0530)
committerVallari Agrawal <vallari.agrawal@ibm.com>
Mon, 29 Sep 2025 18:14:10 +0000 (23:44 +0530)
Use this metric to show accurate data for "down"
NVMeoF gateways in ceph NVMeoF Overview dashboard.

Fixes: https://tracker.ceph.com/issues/71384
Signed-off-by: Vallari Agrawal <vallari.agrawal@ibm.com>
monitoring/ceph-mixin/dashboards/ceph-nvmeof.libsonnet
monitoring/ceph-mixin/dashboards_out/ceph-nvmeof.json

index 4002142ceccb9fc2ba4009585953a97af313a02c..44d271e1646cd876837927f0681562a7e569cd12 100644 (file)
@@ -119,191 +119,164 @@ local g = import 'grafonnet/grafana.libsonnet';
       )
     ),
 
-    $.addStatPanel(
-      title='Total Gateways',
-      description='',
-      unit='',
-      datasource='$datasource',
-      gridPosition={ x: 3, y: 1, w: 5, h: 3 },
-      colorMode='background',
-      graphMode='none',
-      justifyMode='auto',
-      orientation='auto',
-      textMode='auto',
-      interval='1m',
-      color={ mode: 'thresholds' },
-      thresholdsMode='',
-      noValue=null,
-    ).addThresholds([
-      { color: '#808080', value: null },
-      { color: 'red', value: 1.0003 },
-    ])
-    .addTarget(
-      $.addTargetSchema(
-        expr="count(ceph_nvmeof_gateway_info) + sum(ceph_health_detail{name='NVMEOF_GATEWAY_DOWN'})",
-        format='time_series',
-        instant=true,
-        legendFormat='Total',
-        range=false,
+      $.addStatPanel(
+        title='Total Gateways',
+        description='',
+        unit='',
         datasource='$datasource',
+        gridPosition={ x: 3, y: 1, w: 5, h: 3 },
+        colorMode="background",
+        graphMode="none",
+        justifyMode="auto",
+        orientation="auto",
+        textMode="auto",
+        interval='1m',
+        color={ mode: 'thresholds' },
+        thresholdsMode='',
+        noValue="0",
+      ).addThresholds([
+        { color: '#808080', value: null },
+        { color: 'red', value: 1.0003 },
+      ])
+      .addTarget(
+        $.addTargetSchema(
+          expr='count(ceph_cephadm_daemon_status{service_type="nvmeof"})',
+          format='time_series',
+          instant=true,
+          legendFormat="Total",
+          range=false,
+          datasource='$datasource',
+        )
       )
-    )
-    .addTarget(
-      $.addTargetSchema(
-        expr='count(ceph_nvmeof_gateway_info)',
-        format='time_series',
-        instant=false,
-        legendFormat='Available',
-        range=true,
-        datasource='$datasource',
+      .addTarget(
+        $.addTargetSchema(
+          expr='count(ceph_cephadm_daemon_status{service_type="nvmeof"}==1 or ceph_cephadm_daemon_status{service_type="nvmeof"}==2)',
+          format='time_series',
+          instant=true,
+          legendFormat="Available",
+          range=false,
+          datasource='$datasource',
+        )
       )
-    )
-    .addTarget(
-      $.addTargetSchema(
-        expr="(ceph_health_detail{name='NVMEOF_GATEWAY_DOWN'})",
-        format='time_series',
-        instant=true,
-        legendFormat='Down',
-        range=false,
-        datasource='$datasource',
+      .addTarget(
+        $.addTargetSchema(
+          expr='count(ceph_cephadm_daemon_status{service_type="nvmeof"}==0 or ceph_cephadm_daemon_status{service_type="nvmeof"}==-1 or ceph_cephadm_daemon_status{service_type="nvmeof"} == -2)',
+          format='time_series',
+          instant=true,
+          legendFormat="Down",
+          range=false,
+          datasource='$datasource',
+        )
       )
-    )
-    .addOverrides([
-      {
-        matcher: { id: 'byName', options: 'Down' },
-        properties: [
-          {
-            id: 'color',
-            value: {
-              fixedColor: 'red',
-              mode: 'fixed',
+      .addOverrides([
+        {
+          matcher: { id: 'byName', options: 'Down' },
+          properties: [
+            {
+              id: 'color',
+              value: { 
+                fixedColor: 'red', 
+                mode: 'fixed' 
+              },
             },
-          },
-        ],
-      },
-      {
-        matcher: { id: 'byName', options: 'Total' },
-        properties: [
-          {
-            id: 'color',
-            value: {
-              fixedColor: '#a7a38b',
-              mode: 'fixed',
+          ],
+        },
+        {
+          matcher: { id: 'byName', options: 'Total' },
+          properties: [
+            {
+              id: 'color',
+              value: { 
+                fixedColor: '#a7a38b', 
+                mode: 'fixed' 
+              },
             },
-          },
-        ],
-      },
-      {
-        matcher: { id: 'byName', options: 'Available' },
-        properties: [
-          {
-            id: 'color',
-            value: {
-              fixedColor: 'green',
-              mode: 'fixed',
+          ],
+        },
+        {
+          matcher: { id: 'byName', options: 'Available' },
+          properties: [
+            {
+              id: 'color',
+              value: { 
+                fixedColor: 'green', 
+                mode: 'fixed' 
+              },
             },
-          },
-        ],
-      },
-    ]),
+          ],
+        },
+      ]),
 
-    $.timeSeriesPanel(
-      title='Ceph Health NVMeoF WARNING',
-      description='Ceph healthchecks NVMeoF WARNINGs',
-      gridPosition={ x: 8, y: 1, w: 7, h: 8 },
-      lineInterpolation='linear',
-      lineWidth=1,
-      drawStyle='line',
-      axisPlacement='auto',
-      datasource='$datasource',
-      fillOpacity=5,
-      pointSize=5,
-      showPoints='auto',
-      unit='none',
-      displayMode='list',
-      showLegend=true,
-      placement='bottom',
-      tooltip={ mode: 'multi', sort: 'desc' },
-      stackingMode='normal',
-      spanNulls=false,
-      decimals=0,
-      thresholdsMode='absolute',
-      noValue='0',
-    ).addThresholds([
-      { color: 'green', value: null },
-    ])
-    .addTarget(
-      $.addTargetSchema(
-        expr="sum(ceph_health_detail{name='NVMEOF_GATEWAY_DOWN'})",
-        format='',
-        instant=false,
-        legendFormat='NVMEOF_GATEWAY_DOWN',
-        range=true,
+      $.timeSeriesPanel( 
+        title='Gateway WARNING Health States',
+        description='Gateways in error states',
+        gridPosition={ x: 8, y: 1, w: 7, h: 8 },
+        lineInterpolation='linear',
+        lineWidth=1,
+        drawStyle='line',
+        axisPlacement='auto',
         datasource='$datasource',
+        fillOpacity=5,
+        pointSize=5,
+        showPoints='auto',
+        unit='none',
+        displayMode='list',
+        showLegend=false,
+        placement='bottom',
+        tooltip={'hideZeros': true, 'mode': 'multi', 'sort': 'desc'},
+        stackingMode='normal',
+        spanNulls=false,
+        decimals=0,
+        thresholdsMode='absolute',
+        noValue=0,
+      ).addThresholds([
+        { color: 'green', value: null },
+      ])
+      .addTarget(
+        $.addTargetSchema(
+          expr='group by (daemon_name) (ceph_cephadm_daemon_status{service_type="nvmeof"} == 0)',
+          format='',
+          instant=false,
+          legendFormat="stopped - {{ daemon_name }} ",
+          range=true,
+          datasource='$datasource',
+        )
       )
-    )
-    .addTarget(
-      $.addTargetSchema(
-        expr="sum(ceph_health_detail{name='NVMEOF_GATEWAY_DELETING'})",
-        format='',
-        instant=false,
-        legendFormat='NVMEOF_GATEWAY_DELETING',
-        range=true,
-        datasource='$datasource',
+      .addTarget(
+        $.addTargetSchema(
+          expr='group by (daemon_name) (ceph_cephadm_daemon_status{service_type="nvmeof"} == -1)',
+          format='',
+          instant=false,
+          legendFormat="error - {{ daemon_name }}",
+          range=true,
+          datasource='$datasource',
+        )
       )
-    )
-    .addTarget(
-      $.addTargetSchema(
-        expr="sum(ceph_health_detail{name='NVMEOF_SINGLE_GATEWAY'})",
-        format='',
-        instant=false,
-        legendFormat='NVMEOF_SINGLE_GATEWAY',
-        range=true,
-        datasource='$datasource',
+      .addTarget(
+        $.addTargetSchema(
+          expr='group by (daemon_name) (ceph_cephadm_daemon_status{service_type="nvmeof"} == -2)',
+          format='',
+          instant=false,
+          legendFormat="unknown_state - {{ daemon_name }}",
+          range=true,
+          datasource='$datasource',
+        )
       )
-    )
-    .addOverrides([
-      {
-        matcher: { id: 'byName', options: 'NVMEOF_GATEWAY_DOWN' },
-        properties: [
-          {
-            id: 'color',
-            value: {
-              fixedColor: 'red',
-              mode: 'fixed',
-            },
-          },
-        ],
-      },
-      {
-        matcher: { id: 'byName', options: 'NVMEOF_GATEWAY_DELETING' },
-        properties: [
-          {
-            id: 'color',
-            value: {
-              fixedColor: 'dark-purple',
-              mode: 'fixed',
+      .addOverrides([
+        {
+          matcher: { id: 'byType', options: 'number' },
+          properties: [
+            {
+              id: 'color',
+              value: { 
+                fixedColor: 'orange', 
+                mode: 'shades' 
+              },
             },
-          },
-        ],
-      },
-      {
-        matcher: { id: 'byName', options: 'NVMEOF_SINGLE_GATEWAY' },
-        properties: [
-          {
-            id: 'custom.lineWidth',
-            value: 1,
-          },
-          {
-            id: 'color',
-            value: {
-              fixedColor: 'super-light-orange',
-              mode: 'shades',
-            },
-          },
-        ],
-      },
-    ]),
+          ],
+        }
+        ]
+      ),
 
     $.addAlertListPanel(
       title='Active Alerts',
index 2112d7a75404da07301c2ae295a55c8dad398129..739668beb897cddeb37bc26f5337c569c1799a93 100644 (file)
                "decimals": 0,
                "links": [ ],
                "mappings": [ ],
+               "noValue": "0",
                "thresholds": {
                   "mode": "",
                   "steps": [
          "targets": [
             {
                "datasource": "$datasource",
-               "expr": "count(ceph_nvmeof_gateway_info) + sum(ceph_health_detail{name='NVMEOF_GATEWAY_DOWN'})",
+               "expr": "count(ceph_cephadm_daemon_status{service_type=\"nvmeof\"})",
                "format": "time_series",
                "instant": true,
                "intervalFactor": 1,
             },
             {
                "datasource": "$datasource",
-               "expr": "count(ceph_nvmeof_gateway_info)",
+               "expr": "count(ceph_cephadm_daemon_status{service_type=\"nvmeof\"}==1 or ceph_cephadm_daemon_status{service_type=\"nvmeof\"}==2)",
                "format": "time_series",
-               "instant": false,
+               "instant": true,
                "intervalFactor": 1,
                "legendFormat": "Available",
-               "range": true,
+               "range": false,
                "refId": "B"
             },
             {
                "datasource": "$datasource",
-               "expr": "(ceph_health_detail{name='NVMEOF_GATEWAY_DOWN'})",
+               "expr": "count(ceph_cephadm_daemon_status{service_type=\"nvmeof\"}==0 or ceph_cephadm_daemon_status{service_type=\"nvmeof\"}==-1 or ceph_cephadm_daemon_status{service_type=\"nvmeof\"} == -2)",
                "format": "time_series",
                "instant": true,
                "intervalFactor": 1,
       },
       {
          "datasource": "$datasource",
-         "description": "Ceph healthchecks NVMeoF WARNINGs",
+         "description": "Gateways in error states",
          "fieldConfig": {
             "defaults": {
                "color": {
                   }
                },
                "decimals": 0,
-               "noValue": "0",
+               "mappings": [ ],
+               "noValue": 0,
                "thresholds": {
                   "mode": "absolute",
                   "steps": [
             "overrides": [
                {
                   "matcher": {
-                     "id": "byName",
-                     "options": "NVMEOF_GATEWAY_DOWN"
-                  },
-                  "properties": [
-                     {
-                        "id": "color",
-                        "value": {
-                           "fixedColor": "red",
-                           "mode": "fixed"
-                        }
-                     }
-                  ]
-               },
-               {
-                  "matcher": {
-                     "id": "byName",
-                     "options": "NVMEOF_GATEWAY_DELETING"
-                  },
-                  "properties": [
-                     {
-                        "id": "color",
-                        "value": {
-                           "fixedColor": "dark-purple",
-                           "mode": "fixed"
-                        }
-                     }
-                  ]
-               },
-               {
-                  "matcher": {
-                     "id": "byName",
-                     "options": "NVMEOF_SINGLE_GATEWAY"
+                     "id": "byType",
+                     "options": "number"
                   },
                   "properties": [
-                     {
-                        "id": "custom.lineWidth",
-                        "value": 1
-                     },
                      {
                         "id": "color",
                         "value": {
-                           "fixedColor": "super-light-orange",
+                           "fixedColor": "orange",
                            "mode": "shades"
                         }
                      }
                "calcs": [ ],
                "displayMode": "list",
                "placement": "bottom",
-               "showLegend": true
+               "showLegend": false
             },
             "tooltip": {
+               "hideZeros": true,
                "mode": "multi",
                "sort": "desc"
             }
          "targets": [
             {
                "datasource": "$datasource",
-               "expr": "sum(ceph_health_detail{name='NVMEOF_GATEWAY_DOWN'})",
+               "expr": "group by (daemon_name) (ceph_cephadm_daemon_status{service_type=\"nvmeof\"} == 0)",
                "format": "",
                "instant": false,
                "intervalFactor": 1,
-               "legendFormat": "NVMEOF_GATEWAY_DOWN",
+               "legendFormat": "stopped - {{ daemon_name }} ",
                "range": true,
                "refId": "A"
             },
             {
                "datasource": "$datasource",
-               "expr": "sum(ceph_health_detail{name='NVMEOF_GATEWAY_DELETING'})",
+               "expr": "group by (daemon_name) (ceph_cephadm_daemon_status{service_type=\"nvmeof\"} == -1)",
                "format": "",
                "instant": false,
                "intervalFactor": 1,
-               "legendFormat": "NVMEOF_GATEWAY_DELETING",
+               "legendFormat": "error - {{ daemon_name }}",
                "range": true,
                "refId": "B"
             },
             {
                "datasource": "$datasource",
-               "expr": "sum(ceph_health_detail{name='NVMEOF_SINGLE_GATEWAY'})",
+               "expr": "group by (daemon_name) (ceph_cephadm_daemon_status{service_type=\"nvmeof\"} == -2)",
                "format": "",
                "instant": false,
                "intervalFactor": 1,
-               "legendFormat": "NVMEOF_SINGLE_GATEWAY",
+               "legendFormat": "unknown_state - {{ daemon_name }}",
                "range": true,
                "refId": "C"
             }
          ],
-         "title": "Ceph Health NVMeoF WARNING",
+         "title": "Gateway WARNING Health States",
          "type": "timeseries"
       },
       {
                   }
                },
                "decimals": 0,
+               "mappings": [ ],
                "noValue": "0",
                "thresholds": {
                   "mode": "absolute",