num_up = 0
num_in = 0
for osd in osd_map['osds']:
+ data['[osd.{0},up]'.format(int(osd['osd']))] = osd['up']
if osd['up'] == 1:
num_up += 1
+ data['[osd.{0},in]'.format(int(osd['osd']))] = osd['in']
if osd['in'] == 1:
num_in += 1
osd_stats = self.get('osd_stats')
for osd in osd_stats['osd_stats']:
- if osd['kb'] == 0:
+ try:
+ osd_fill.append((float(osd['kb_used']) / float(osd['kb'])) * 100)
+ data['[osd.{0},osd_fill]'.format(osd['osd'])] = (
+ float(osd['kb_used']) / float(osd['kb'])) * 100
+ except ZeroDivisionError:
continue
- osd_fill.append((float(osd['kb_used']) / float(osd['kb'])) * 100)
osd_pgs.append(osd['num_pgs'])
osd_apply_latency_ns.append(osd['perf_stat']['apply_latency_ns'])
osd_commit_latency_ns.append(osd['perf_stat']['commit_latency_ns'])
+ data['[osd.{0},num_pgs]'.format(osd['osd'])] = osd['num_pgs']
+ data[
+ '[osd.{0},osd_latency_apply]'.format(osd['osd'])
+ ] = osd['perf_stat']['apply_latency_ns'] / 1000000.0 # ns -> ms
+ data[
+ '[osd.{0},osd_latency_commit]'.format(osd['osd'])
+ ] = osd['perf_stat']['commit_latency_ns'] / 1000000.0 # ns -> ms
try:
data['osd_max_fill'] = max(osd_fill)
return False
def discovery(self):
- pools = self.get('osd_map')['pools']
- crush_rules = self.get('osd_map_crush')['rules']
+ osd_map = self.get('osd_map')
+ osd_map_crush = self.get('osd_map_crush')
+ # Discovering ceph pools
pool_discovery = {
pool['pool_name']: step['item_name']
- for pool in pools
- for rule in crush_rules if rule['rule_id'] == pool['crush_rule']
+ for pool in osd_map['pools']
+ for rule in osd_map_crush['rules'] if rule['rule_id'] == pool['crush_rule']
for step in rule['steps'] if step['op'] == "take"
}
-
- discovery_data = {"data": []}
- for pool, rule in pool_discovery.items():
- discovery_data["data"].append({
+ pools_discovery_data = {"data": [
+ {
"{#POOL}": pool,
"{#CRUSH_RULE}": rule
- })
-
- data = {"zabbix.discovery": json.dumps(discovery_data)}
+ }
+ for pool, rule in pool_discovery.items()
+ ]}
+
+ # Discovering OSDs
+ # Getting hosts for found crush rules
+ osd_roots = {
+ step['item_name']: [
+ item['id']
+ for item in root_bucket['items']
+ ]
+ for rule in osd_map_crush['rules']
+ for step in rule['steps'] if step['op'] == "take"
+ for root_bucket in osd_map_crush['buckets']
+ if root_bucket['id'] == step['item']
+ }
+ # Getting osds for hosts with map to crush_rule
+ osd_discovery = {
+ item['id']: crush_rule
+ for crush_rule, roots in osd_roots.items()
+ for root in roots
+ for bucket in osd_map_crush['buckets']
+ if bucket['id'] == root
+ for item in bucket['items']
+ }
+ osd_discovery_data = {"data": [
+ {
+ "{#OSD}": osd,
+ "{#CRUSH_RULE}": rule
+ }
+ for osd, rule in osd_discovery.items()
+ ]}
+ # Preparing recieved data for sending
+ data = {
+ "zabbix.pool.discovery": json.dumps(pools_discovery_data),
+ "zabbix.osd.discovery": json.dumps(osd_discovery_data)
+ }
return bool(self.send(data))
def handle_command(self, inbuf, command):
if self.discovery():
return 0, 'Sending discovery data to Zabbix', ''
+ return 1, 'Failed to send discovery data to Zabbix', ''
+
else:
return (-errno.EINVAL, '',
"Command not found '{0}'".format(command['prefix']))
</item>
</items>
<discovery_rules>
+ <discovery_rule>
+ <name>Ceph OSD discovery</name>
+ <type>2</type>
+ <snmp_community/>
+ <snmp_oid/>
+ <key>ceph.zabbix.osd.discovery</key>
+ <delay>0</delay>
+ <status>0</status>
+ <allowed_hosts/>
+ <snmpv3_contextname/>
+ <snmpv3_securityname/>
+ <snmpv3_securitylevel>0</snmpv3_securitylevel>
+ <snmpv3_authprotocol>0</snmpv3_authprotocol>
+ <snmpv3_authpassphrase/>
+ <snmpv3_privprotocol>0</snmpv3_privprotocol>
+ <snmpv3_privpassphrase/>
+ <params/>
+ <ipmi_sensor/>
+ <authtype>0</authtype>
+ <username/>
+ <password/>
+ <publickey/>
+ <privatekey/>
+ <port/>
+ <filter>
+ <evaltype>0</evaltype>
+ <formula/>
+ <conditions/>
+ </filter>
+ <lifetime>90</lifetime>
+ <description/>
+ <item_prototypes>
+ <item_prototype>
+ <name>[osd.{#OSD}] OSD in</name>
+ <type>2</type>
+ <snmp_community/>
+ <snmp_oid/>
+ <key>ceph.[osd.{#OSD},in]</key>
+ <delay>0</delay>
+ <history>90</history>
+ <trends>365</trends>
+ <status>0</status>
+ <value_type>3</value_type>
+ <allowed_hosts/>
+ <units/>
+ <delta>0</delta>
+ <snmpv3_contextname/>
+ <snmpv3_securityname/>
+ <snmpv3_securitylevel>0</snmpv3_securitylevel>
+ <snmpv3_authprotocol>0</snmpv3_authprotocol>
+ <snmpv3_authpassphrase/>
+ <snmpv3_privprotocol>0</snmpv3_privprotocol>
+ <snmpv3_privpassphrase/>
+ <formula>1</formula>
+ <delay_flex/>
+ <params/>
+ <ipmi_sensor/>
+ <authtype>0</authtype>
+ <username/>
+ <password/>
+ <publickey/>
+ <privatekey/>
+ <port/>
+ <description/>
+ <inventory_link>0</inventory_link>
+ <applications>
+ <application>
+ <name>Ceph</name>
+ </application>
+ </applications>
+ <valuemap/>
+ <logtimefmt/>
+ <preprocessing/>
+ <jmx_endpoint/>
+ <application_prototypes>
+ <application_prototype>
+ <name>Ceph {#CRUSH_RULE}</name>
+ </application_prototype>
+ </application_prototypes>
+ <master_item_prototype/>
+ </item_prototype>
+ <item_prototype>
+ <name>[osd.{#OSD}] OSD PGs</name>
+ <type>2</type>
+ <snmp_community/>
+ <snmp_oid/>
+ <key>ceph.[osd.{#OSD},num_pgs]</key>
+ <delay>0</delay>
+ <history>90</history>
+ <trends>365</trends>
+ <status>0</status>
+ <value_type>3</value_type>
+ <allowed_hosts/>
+ <units/>
+ <delta>0</delta>
+ <snmpv3_contextname/>
+ <snmpv3_securityname/>
+ <snmpv3_securitylevel>0</snmpv3_securitylevel>
+ <snmpv3_authprotocol>0</snmpv3_authprotocol>
+ <snmpv3_authpassphrase/>
+ <snmpv3_privprotocol>0</snmpv3_privprotocol>
+ <snmpv3_privpassphrase/>
+ <formula>1</formula>
+ <delay_flex/>
+ <params/>
+ <ipmi_sensor/>
+ <authtype>0</authtype>
+ <username/>
+ <password/>
+ <publickey/>
+ <privatekey/>
+ <port/>
+ <description/>
+ <inventory_link>0</inventory_link>
+ <applications>
+ <application>
+ <name>Ceph</name>
+ </application>
+ </applications>
+ <valuemap/>
+ <logtimefmt/>
+ <preprocessing/>
+ <jmx_endpoint/>
+ <application_prototypes>
+ <application_prototype>
+ <name>Ceph {#CRUSH_RULE}</name>
+ </application_prototype>
+ </application_prototypes>
+ <master_item_prototype/>
+ </item_prototype>
+ <item_prototype>
+ <name>[osd.{#OSD}] OSD fill</name>
+ <type>2</type>
+ <snmp_community/>
+ <snmp_oid/>
+ <key>ceph.[osd.{#OSD},osd_fill]</key>
+ <delay>0</delay>
+ <history>90</history>
+ <trends>365</trends>
+ <status>0</status>
+ <value_type>0</value_type>
+ <allowed_hosts/>
+ <units>%</units>
+ <delta>0</delta>
+ <snmpv3_contextname/>
+ <snmpv3_securityname/>
+ <snmpv3_securitylevel>0</snmpv3_securitylevel>
+ <snmpv3_authprotocol>0</snmpv3_authprotocol>
+ <snmpv3_authpassphrase/>
+ <snmpv3_privprotocol>0</snmpv3_privprotocol>
+ <snmpv3_privpassphrase/>
+ <formula>1</formula>
+ <delay_flex/>
+ <params/>
+ <ipmi_sensor/>
+ <authtype>0</authtype>
+ <username/>
+ <password/>
+ <publickey/>
+ <privatekey/>
+ <port/>
+ <description/>
+ <inventory_link>0</inventory_link>
+ <applications>
+ <application>
+ <name>Ceph</name>
+ </application>
+ </applications>
+ <valuemap/>
+ <logtimefmt/>
+ <preprocessing/>
+ <jmx_endpoint/>
+ <application_prototypes>
+ <application_prototype>
+ <name>Ceph {#CRUSH_RULE}</name>
+ </application_prototype>
+ </application_prototypes>
+ <master_item_prototype/>
+ </item_prototype>
+ <item_prototype>
+ <name>[osd.{#OSD}] OSD latency apply</name>
+ <type>2</type>
+ <snmp_community/>
+ <snmp_oid/>
+ <key>ceph.[osd.{#OSD},osd_latency_apply]</key>
+ <delay>0</delay>
+ <history>90</history>
+ <trends>365</trends>
+ <status>0</status>
+ <value_type>0</value_type>
+ <allowed_hosts/>
+ <units>ms</units>
+ <delta>0</delta>
+ <snmpv3_contextname/>
+ <snmpv3_securityname/>
+ <snmpv3_securitylevel>0</snmpv3_securitylevel>
+ <snmpv3_authprotocol>0</snmpv3_authprotocol>
+ <snmpv3_authpassphrase/>
+ <snmpv3_privprotocol>0</snmpv3_privprotocol>
+ <snmpv3_privpassphrase/>
+ <formula>1</formula>
+ <delay_flex/>
+ <params/>
+ <ipmi_sensor/>
+ <authtype>0</authtype>
+ <username/>
+ <password/>
+ <publickey/>
+ <privatekey/>
+ <port/>
+ <description/>
+ <inventory_link>0</inventory_link>
+ <applications>
+ <application>
+ <name>Ceph</name>
+ </application>
+ </applications>
+ <valuemap/>
+ <logtimefmt/>
+ <preprocessing/>
+ <jmx_endpoint/>
+ <application_prototypes>
+ <application_prototype>
+ <name>Ceph {#CRUSH_RULE}</name>
+ </application_prototype>
+ </application_prototypes>
+ <master_item_prototype/>
+ </item_prototype>
+ <item_prototype>
+ <name>[osd.{#OSD}] OSD latency commit</name>
+ <type>2</type>
+ <snmp_community/>
+ <snmp_oid/>
+ <key>ceph.[osd.{#OSD},osd_latency_commit]</key>
+ <delay>0</delay>
+ <history>90</history>
+ <trends>365</trends>
+ <status>0</status>
+ <value_type>0</value_type>
+ <allowed_hosts/>
+ <units>ms</units>
+ <delta>0</delta>
+ <snmpv3_contextname/>
+ <snmpv3_securityname/>
+ <snmpv3_securitylevel>0</snmpv3_securitylevel>
+ <snmpv3_authprotocol>0</snmpv3_authprotocol>
+ <snmpv3_authpassphrase/>
+ <snmpv3_privprotocol>0</snmpv3_privprotocol>
+ <snmpv3_privpassphrase/>
+ <formula>1</formula>
+ <delay_flex/>
+ <params/>
+ <ipmi_sensor/>
+ <authtype>0</authtype>
+ <username/>
+ <password/>
+ <publickey/>
+ <privatekey/>
+ <port/>
+ <description/>
+ <inventory_link>0</inventory_link>
+ <applications>
+ <application>
+ <name>Ceph</name>
+ </application>
+ </applications>
+ <valuemap/>
+ <logtimefmt/>
+ <preprocessing/>
+ <jmx_endpoint/>
+ <application_prototypes>
+ <application_prototype>
+ <name>Ceph {#CRUSH_RULE}</name>
+ </application_prototype>
+ </application_prototypes>
+ <master_item_prototype/>
+ </item_prototype>
+ <item_prototype>
+ <name>[osd.{#OSD}] OSD up</name>
+ <type>2</type>
+ <snmp_community/>
+ <snmp_oid/>
+ <key>ceph.[osd.{#OSD},up]</key>
+ <delay>0</delay>
+ <history>90</history>
+ <trends>365</trends>
+ <status>0</status>
+ <value_type>3</value_type>
+ <allowed_hosts/>
+ <units/>
+ <delta>0</delta>
+ <snmpv3_contextname/>
+ <snmpv3_securityname/>
+ <snmpv3_securitylevel>0</snmpv3_securitylevel>
+ <snmpv3_authprotocol>0</snmpv3_authprotocol>
+ <snmpv3_authpassphrase/>
+ <snmpv3_privprotocol>0</snmpv3_privprotocol>
+ <snmpv3_privpassphrase/>
+ <formula>1</formula>
+ <delay_flex/>
+ <params/>
+ <ipmi_sensor/>
+ <authtype>0</authtype>
+ <username/>
+ <password/>
+ <publickey/>
+ <privatekey/>
+ <port/>
+ <description/>
+ <inventory_link>0</inventory_link>
+ <applications>
+ <application>
+ <name>Ceph</name>
+ </application>
+ </applications>
+ <valuemap/>
+ <logtimefmt/>
+ <preprocessing/>
+ <jmx_endpoint/>
+ <application_prototypes>
+ <application_prototype>
+ <name>Ceph {#CRUSH_RULE}</name>
+ </application_prototype>
+ </application_prototypes>
+ <master_item_prototype/>
+ </item_prototype>
+ </item_prototypes>
+ <trigger_prototypes>
+ <trigger_prototype>
+ <expression>{ceph-mgr Zabbix module:ceph.[osd.{#OSD},up].last()}=0</expression>
+ <recovery_mode>0</recovery_mode>
+ <recovery_expression/>
+ <name>Ceph OSD osd.{#OSD} is DOWN</name>
+ <correlation_mode>0</correlation_mode>
+ <correlation_tag/>
+ <url/>
+ <status>0</status>
+ <priority>2</priority>
+ <description/>
+ <type>0</type>
+ <manual_close>0</manual_close>
+ <dependencies/>
+ <tags/>
+ </trigger_prototype>
+ <trigger_prototype>
+ <expression>{ceph-mgr Zabbix module:ceph.[osd.{#OSD},osd_fill].last()}>={ceph-mgr Zabbix module:ceph.osd_full_ratio.last()}</expression>
+ <recovery_mode>0</recovery_mode>
+ <recovery_expression/>
+ <name>Ceph OSD osd.{#OSD} is full: {ITEM.VALUE}%</name>
+ <correlation_mode>0</correlation_mode>
+ <correlation_tag/>
+ <url/>
+ <status>0</status>
+ <priority>4</priority>
+ <description/>
+ <type>0</type>
+ <manual_close>0</manual_close>
+ <dependencies/>
+ <tags/>
+ </trigger_prototype>
+ <trigger_prototype>
+ <expression>{ceph-mgr Zabbix module:ceph.[osd.{#OSD},osd_fill].last()}>={ceph-mgr Zabbix module:ceph.osd_nearfull_ratio.last()}</expression>
+ <recovery_mode>0</recovery_mode>
+ <recovery_expression/>
+ <name>Ceph OSD osd.{#OSD} is near full: {ITEM.VALUE}%</name>
+ <correlation_mode>0</correlation_mode>
+ <correlation_tag/>
+ <url/>
+ <status>0</status>
+ <priority>2</priority>
+ <description/>
+ <type>0</type>
+ <manual_close>0</manual_close>
+ <dependencies/>
+ <tags/>
+ </trigger_prototype>
+ </trigger_prototypes>
+ <graph_prototypes/>
+ <host_prototypes/>
+ <jmx_endpoint/>
+ </discovery_rule>
<discovery_rule>
<name>Ceph pool discovery</name>
<type>2</type>
<snmp_community/>
<snmp_oid/>
- <key>ceph.zabbix.discovery</key>
+ <key>ceph.zabbix.pool.discovery</key>
<delay>0</delay>
<status>0</status>
<allowed_hosts/>