]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/zabbix: Send max, min and avg PGs of OSDs to Zabbix 21043/head
authorWido den Hollander <wido@42on.com>
Mon, 26 Mar 2018 11:27:27 +0000 (13:27 +0200)
committerWido den Hollander <wido@42on.com>
Mon, 26 Mar 2018 11:27:27 +0000 (13:27 +0200)
We already send the max, min and avg fill ratio of OSDs but
knowing the OSD with the highest amount of PGs is also useful.

This allows admins to create a trigger should it happen that there
is a OSD with too many PGs.

This could happen if a lot of OSDs fail and PGs start to move filling
up one or more OSDs with many PGs.

As PGs eat CPU and Memory people usually like to watch out for these
situations.

Signed-off-by: Wido den Hollander <wido@42on.com>
src/pybind/mgr/zabbix/module.py
src/pybind/mgr/zabbix/zabbix_template.xml

index 84631cdc67b82544e744d127750bce98add34847..5377162b39f38226f4b6d854099df35f6d52ace2 100644 (file)
@@ -172,6 +172,7 @@ class Module(MgrModule):
         data['num_osd_in'] = num_in
 
         osd_fill = list()
+        osd_pgs = list()
         osd_apply_latency_ns = list()
         osd_commit_latency_ns = list()
 
@@ -180,6 +181,7 @@ class Module(MgrModule):
             if osd['kb'] == 0:
                 continue
             osd_fill.append((float(osd['kb_used']) / float(osd['kb'])) * 100)
+            osd_pgs.append(osd['num_pgs'])
             osd_apply_latency_ns.append(osd['perf_stat']['apply_latency_ns'])
             osd_commit_latency_ns.append(osd['perf_stat']['commit_latency_ns'])
 
@@ -187,6 +189,9 @@ class Module(MgrModule):
             data['osd_max_fill'] = max(osd_fill)
             data['osd_min_fill'] = min(osd_fill)
             data['osd_avg_fill'] = avg(osd_fill)
+            data['osd_max_pgs'] = max(osd_pgs)
+            data['osd_min_pgs'] = min(osd_pgs)
+            data['osd_avg_pgs'] = avg(osd_pgs)
         except ValueError:
             pass
 
index 0ad40027210ea0635894b91c6141159ff03e2b39..a2d1988341c09bbb3b87a3ff8ad064a86b689480 100644 (file)
                     <valuemap/>
                     <logtimefmt/>
                 </item>
+                <item>
+                    <name>Ceph OSD max PGs</name>
+                    <type>2</type>
+                    <snmp_community/>
+                    <multiplier>0</multiplier>
+                    <snmp_oid/>
+                    <key>ceph.osd_max_pgs</key>
+                    <delay>0</delay>
+                    <history>90</history>
+                    <trends>365</trends>
+                    <status>0</status>
+                    <value_type>0</value_type>
+                    <allowed_hosts/>
+                    <units/>
+                    <delta>0</delta>
+                    <snmpv3_contextname/>
+                    <snmpv3_securityname/>
+                    <snmpv3_securitylevel>0</snmpv3_securitylevel>
+                    <snmpv3_authprotocol>0</snmpv3_authprotocol>
+                    <snmpv3_authpassphrase/>
+                    <snmpv3_privprotocol>0</snmpv3_privprotocol>
+                    <snmpv3_privpassphrase/>
+                    <formula>1</formula>
+                    <delay_flex/>
+                    <params/>
+                    <ipmi_sensor/>
+                    <data_type>0</data_type>
+                    <authtype>0</authtype>
+                    <username/>
+                    <password/>
+                    <publickey/>
+                    <privatekey/>
+                    <port/>
+                    <description>Maximum amount of PGs on OSDs</description>
+                    <inventory_link>0</inventory_link>
+                    <applications>
+                        <application>
+                            <name>Ceph</name>
+                        </application>
+                    </applications>
+                    <valuemap/>
+                    <logtimefmt/>
+                </item>
+                <item>
+                    <name>Ceph OSD min PGs</name>
+                    <type>2</type>
+                    <snmp_community/>
+                    <multiplier>0</multiplier>
+                    <snmp_oid/>
+                    <key>ceph.osd_min_pgs</key>
+                    <delay>0</delay>
+                    <history>90</history>
+                    <trends>365</trends>
+                    <status>0</status>
+                    <value_type>0</value_type>
+                    <allowed_hosts/>
+                    <units/>
+                    <delta>0</delta>
+                    <snmpv3_contextname/>
+                    <snmpv3_securityname/>
+                    <snmpv3_securitylevel>0</snmpv3_securitylevel>
+                    <snmpv3_authprotocol>0</snmpv3_authprotocol>
+                    <snmpv3_authpassphrase/>
+                    <snmpv3_privprotocol>0</snmpv3_privprotocol>
+                    <snmpv3_privpassphrase/>
+                    <formula>1</formula>
+                    <delay_flex/>
+                    <params/>
+                    <ipmi_sensor/>
+                    <data_type>0</data_type>
+                    <authtype>0</authtype>
+                    <username/>
+                    <password/>
+                    <publickey/>
+                    <privatekey/>
+                    <port/>
+                    <description>Minimum amount of PGs on OSDs</description>
+                    <inventory_link>0</inventory_link>
+                    <applications>
+                        <application>
+                            <name>Ceph</name>
+                        </application>
+                    </applications>
+                    <valuemap/>
+                    <logtimefmt/>
+                </item>
+                <item>
+                    <name>Ceph OSD avg PGs</name>
+                    <type>2</type>
+                    <snmp_community/>
+                    <multiplier>0</multiplier>
+                    <snmp_oid/>
+                    <key>ceph.osd_avg_pgs</key>
+                    <delay>0</delay>
+                    <history>90</history>
+                    <trends>365</trends>
+                    <status>0</status>
+                    <value_type>0</value_type>
+                    <allowed_hosts/>
+                    <units/>
+                    <delta>0</delta>
+                    <snmpv3_contextname/>
+                    <snmpv3_securityname/>
+                    <snmpv3_securitylevel>0</snmpv3_securitylevel>
+                    <snmpv3_authprotocol>0</snmpv3_authprotocol>
+                    <snmpv3_authpassphrase/>
+                    <snmpv3_privprotocol>0</snmpv3_privprotocol>
+                    <snmpv3_privpassphrase/>
+                    <formula>1</formula>
+                    <delay_flex/>
+                    <params/>
+                    <ipmi_sensor/>
+                    <data_type>0</data_type>
+                    <authtype>0</authtype>
+                    <username/>
+                    <password/>
+                    <publickey/>
+                    <privatekey/>
+                    <port/>
+                    <description>Average amount of PGs on OSDs</description>
+                    <inventory_link>0</inventory_link>
+                    <applications>
+                        <application>
+                            <name>Ceph</name>
+                        </application>
+                    </applications>
+                    <valuemap/>
+                    <logtimefmt/>
+                </item>
                 <item>
                     <name>Ceph backfill full ratio</name>
                     <type>2</type>