From: Vallari Agrawal Date: Mon, 18 Nov 2024 05:50:00 +0000 (+0530) Subject: monitoring: Add alert NVMeoFTooManyNamespaces X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=83748bf7bfbfb176c82660ccd17756cbde386837;p=ceph-ci.git monitoring: Add alert NVMeoFTooManyNamespaces NVMeoFTooManyNamespaces helps to alert user if total number of namespaces across subsystems are more than 1024. Change NVMeoFTooManySubsystems limit to 128 from 16. Resolves: rhbz#2324172 Fixes: https://github.com/ceph/ceph-nvmeof/issues/948 Signed-off-by: Vallari Agrawal (cherry picked from commit 614e14654dbdac474943b9c1f9e74afab131c94a) --- diff --git a/monitoring/ceph-mixin/config.libsonnet b/monitoring/ceph-mixin/config.libsonnet index 17b126d7eed..db1b8d1fae2 100644 --- a/monitoring/ceph-mixin/config.libsonnet +++ b/monitoring/ceph-mixin/config.libsonnet @@ -12,7 +12,8 @@ NVMeoFMaxGatewaysPerGroup: 8, NVMeoFMaxGatewaysPerCluster: 32, NVMeoFHighGatewayCPU: 80, - NVMeoFMaxSubsystemsPerGateway: 16, + NVMeoFMaxSubsystemsPerGateway: 128, + NVMeoFMaxNamespaces: 1024, NVMeoFHighClientCount: 32, NVMeoFHighHostCPU: 80, // diff --git a/monitoring/ceph-mixin/prometheus_alerts.libsonnet b/monitoring/ceph-mixin/prometheus_alerts.libsonnet index fa2899b22c1..b64790c2255 100644 --- a/monitoring/ceph-mixin/prometheus_alerts.libsonnet +++ b/monitoring/ceph-mixin/prometheus_alerts.libsonnet @@ -912,13 +912,23 @@ { alert: 'NVMeoFTooManySubsystems', 'for': '1m', - expr: 'count by(gateway_host) (label_replace(ceph_nvmeof_subsystem_metadata,"gateway_host","$1","instance","(.*):.*")) > %.2f' % [$._config.NVMeoFMaxSubsystemsPerGateway], + expr: 'count by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_metadata,"gateway_host","$1","instance","(.*?)(?::.*)?")) > %.2f' % [$._config.NVMeoFMaxSubsystemsPerGateway], labels: { severity: 'warning', type: 'ceph_default' }, annotations: { - summary: 'The number of subsystems defined to the gateway exceeds supported values %(cluster)s' % $.MultiClusterSummary(), + summary: 'The number of subsystems defined to the gateway exceeds supported values%(cluster)s' % $.MultiClusterSummary(), description: 'Although you may continue to create subsystems in {{ $labels.gateway_host }}, the configuration may not be supported', }, }, + { + alert: 'NVMeoFTooManyNamespaces', + 'for': '1m', + expr: 'sum by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_namespace_count,"gateway_host","$1","instance","(.*?)(?::.*)?")) > %.2f' % [$._config.NVMeoFMaxNamespaces], + labels: { severity: 'warning', type: 'ceph_default' }, + annotations: { + summary: 'The number of namespaces defined to the gateway exceeds supported values%(cluster)s' % $.MultiClusterSummary(), + description: 'Although you may continue to create namespaces in {{ $labels.gateway_host }}, the configuration may not be supported', + }, + }, { alert: 'NVMeoFVersionMismatch', 'for': '1h', diff --git a/monitoring/ceph-mixin/prometheus_alerts.yml b/monitoring/ceph-mixin/prometheus_alerts.yml index 5e0b29e66a1..efd9b8d92f2 100644 --- a/monitoring/ceph-mixin/prometheus_alerts.yml +++ b/monitoring/ceph-mixin/prometheus_alerts.yml @@ -839,8 +839,17 @@ groups: - alert: "NVMeoFTooManySubsystems" annotations: description: "Although you may continue to create subsystems in {{ $labels.gateway_host }}, the configuration may not be supported" - summary: "The number of subsystems defined to the gateway exceeds supported values " - expr: "count by(gateway_host) (label_replace(ceph_nvmeof_subsystem_metadata,\"gateway_host\",\"$1\",\"instance\",\"(.*):.*\")) > 16.00" + summary: "The number of subsystems defined to the gateway exceeds supported values on cluster {{ $labels.cluster }}" + expr: "count by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_metadata,\"gateway_host\",\"$1\",\"instance\",\"(.*?)(?::.*)?\")) > 128.00" + for: "1m" + labels: + severity: "warning" + type: "ceph_default" + - alert: "NVMeoFTooManyNamespaces" + annotations: + description: "Although you may continue to create namespaces in {{ $labels.gateway_host }}, the configuration may not be supported" + summary: "The number of namespaces defined to the gateway exceeds supported values on cluster {{ $labels.cluster }}" + expr: "sum by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_namespace_count,\"gateway_host\",\"$1\",\"instance\",\"(.*?)(?::.*)?\")) > 1024.00" for: "1m" labels: severity: "warning" diff --git a/monitoring/ceph-mixin/tests_alerts/test_alerts.yml b/monitoring/ceph-mixin/tests_alerts/test_alerts.yml index ad3a198355c..7e514c2120c 100644 --- a/monitoring/ceph-mixin/tests_alerts/test_alerts.yml +++ b/monitoring/ceph-mixin/tests_alerts/test_alerts.yml @@ -2454,49 +2454,273 @@ tests: summary: "Subsystem nqn.bad has been defined without host level security " description: "It is good practice to ensure subsystems use host security to reduce the risk of unexpected data loss" - # NVMeoFTooManySubsystems - - interval: 1m - input_series: - - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn1"}' - values: '1+0x10' - - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn2"}' - values: '1+0x10' - - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn3"}' - values: '1+0x10' - - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn4"}' - values: '1+0x10' - - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn5"}' - values: '1+0x10' - - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn6"}' - values: '1+0x10' - - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn7"}' - values: '1+0x10' - - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn8"}' - values: '1+0x10' - - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn9"}' - values: '1+0x10' - - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn10"}' - values: '1+0x10' - - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn11"}' - values: '1+0x10' - - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn12"}' - values: '1+0x10' - - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn13"}' - values: '1+0x10' - - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn14"}' - values: '1+0x10' - - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn15"}' - values: '1+0x10' - - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn16"}' - values: '1+0x10' - - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn17"}' - values: '1+0x10' - promql_expr_test: - - expr: count by(gateway_host) (label_replace(ceph_nvmeof_subsystem_metadata,"gateway_host","$1","instance","(.*):.*")) > 16 +# NVMeoFTooManySubsystems + - interval: 1m + input_series: + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn1",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn2",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn3",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn4",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn5",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn6",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn7",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn8",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn9",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn10",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn11",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn12",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn13",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn14",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn15",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn16",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn17",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn18",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn19",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn20",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn21",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn22",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn23",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn24",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn25",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn26",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn27",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn28",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn29",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn30",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn31",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn32",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn33",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn34",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn35",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn36",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn37",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn38",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn39",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn40",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn41",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn42",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn43",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn44",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn45",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn46",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn47",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn48",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn49",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn50",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn51",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn52",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn53",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn54",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn55",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn56",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn57",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn58",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn59",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn60",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn61",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn62",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn63",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn64",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn65",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn66",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn67",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn68",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn69",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn70",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn71",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn72",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn73",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn74",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn75",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn76",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn77",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn78",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn79",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn80",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn81",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn82",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn83",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn84",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn85",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn86",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn87",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn88",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn89",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn90",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn91",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn92",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn93",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn94",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn95",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn96",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn97",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn98",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn99",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn100",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn101",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn102",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn103",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn104",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn105",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn106",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn107",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn108",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn109",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn110",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn111",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn112",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn113",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn114",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn115",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn116",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn117",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn118",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn119",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn120",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn121",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn122",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn123",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn124",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn125",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn126",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn127",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn128",cluster="mycluster"}' + values: '1+0x10' + - series: 'ceph_nvmeof_subsystem_metadata{instance="node-1:10008",nqn="nqn129",cluster="mycluster"}' + values: '1+0x10' + promql_expr_test: + - expr: count by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_metadata,"gateway_host","$1","instance","(.*):.*")) > 128 eval_time: 1m exp_samples: - - labels: '{gateway_host="node-1"}' - value: 17 + - labels: '{gateway_host="node-1", cluster="mycluster"}' + value: 129 alert_rule_test: - eval_time: 5m alertname: NVMeoFTooManySubsystems @@ -2504,10 +2728,53 @@ tests: - exp_labels: gateway_host: node-1 severity: warning + cluster: mycluster + type: ceph_default + exp_annotations: + summary: "The number of subsystems defined to the gateway exceeds supported values on cluster mycluster" + description: "Although you may continue to create subsystems in node-1, the configuration may not be supported" + +# NVMeoFTooManyNamespaces + - interval: 1m + input_series: + - series: 'ceph_nvmeof_subsystem_namespace_count{instance="node-1:10008",nqn="nqn1",cluster="mycluster"}' + values: '200+0x10' + - series: 'ceph_nvmeof_subsystem_namespace_count{instance="node-1:10008",nqn="nqn2",cluster="mycluster"}' + values: '200+0x10' + - series: 'ceph_nvmeof_subsystem_namespace_count{instance="node-1:10008",nqn="nqn3",cluster="mycluster"}' + values: '200+0x10' + - series: 'ceph_nvmeof_subsystem_namespace_count{instance="node-1:10008",nqn="nqn4",cluster="mycluster"}' + values: '200+0x10' + - series: 'ceph_nvmeof_subsystem_namespace_count{instance="node-1:10008",nqn="nqn5",cluster="mycluster"}' + values: '200+0x10' + - series: 'ceph_nvmeof_subsystem_namespace_count{instance="node-1:10008",nqn="nqn6",cluster="mycluster"}' + values: '200+0x10' + - series: 'ceph_nvmeof_subsystem_namespace_count{instance="node-1:10008",nqn="nqn7",cluster="mycluster"}' + values: '200+0x10' + - series: 'ceph_nvmeof_subsystem_namespace_count{instance="node-1:10008",nqn="nqn8",cluster="mycluster"}' + values: '200+0x10' + - series: 'ceph_nvmeof_subsystem_namespace_count{instance="node-1:10008",nqn="nqn9",cluster="mycluster"}' + values: '200+0x10' + - series: 'ceph_nvmeof_subsystem_namespace_count{instance="node-1:10008",nqn="nqn10",cluster="mycluster"}' + values: '200+0x10' + promql_expr_test: + - expr: sum by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_namespace_count,"gateway_host","$1","instance","(.*):.*")) > 1024 + eval_time: 1m + exp_samples: + - labels: '{gateway_host="node-1", cluster="mycluster"}' + value: 2000 + alert_rule_test: + - eval_time: 5m + alertname: NVMeoFTooManyNamespaces + exp_alerts: + - exp_labels: + gateway_host: node-1 + severity: warning + cluster: mycluster type: ceph_default exp_annotations: - summary: "The number of subsystems defined to the gateway exceeds supported values " - description: "Although you may continue to create subsystems in node-1, the configuration may not be supported" + summary: "The number of namespaces defined to the gateway exceeds supported values on cluster mycluster" + description: "Although you may continue to create namespaces in node-1, the configuration may not be supported" # NVMeoFVersionMismatch - interval: 1m