--- /dev/null
+#!/usr/bin/perl
+
+#******************************************************************************************
+#
+# NRPE DISK USAGE PLUGIN
+#
+# Program: Disk Usage plugin written to be used with Netsaint and NRPE
+# License: GPL
+# Copyright (c) 2000 Jeremy Hanmer (jeremy@newdream.net)
+#
+# Last Modified: 10/23/00
+#
+# Information: Basically, I wrote this because I had to deal with large numbers of
+# machines with a wide range of disk configurations, and with dynamically mounted
+# partitions. The basic check_disk plugin relied on a static configuration file which
+# doesn't lend itself to being used in a heterogeneous environnment (especially when
+# you can't guarantee that the devices listed in the configuration file will be mounted).
+#
+# Bugs: Currently, this plugin only works on EXT2 partitions (although it's easy to change).
+#
+# Command Line: diskusage.pl <warning percentage> <critical percentage>
+#
+# Tested Systems: Mandrake 7.1/Intel, Debian 2.2/Intel, Debian 2.1/Intel
+#
+# License Information:
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+#*******************************************************************************************
+
+
+use strict;
+
+my $wrn = shift @ARGV;
+my $crt = shift @ARGV;
+my $output;
+my $count;
+my %type;
+my $result = 0;
+my $warn = 0;
+my $crit = 0;
+my @parts;
+my $hostname = `hostname`;
+chomp $hostname;
+@parts = `mount | grep -vi fuse`;
+
+#if ( $hostname eq 'zartan' ) {
+# @parts = `mount`;
+#}
+#else {
+# @parts = `mount -t ext2,reiserfs`;
+#}
+for (@parts) {
+ my ($dev,$on,$mount,$tp,$type,$options) = split(/\s+/,$_);
+ next if ($type eq 'nfs' && !($hostname eq 'zartan'));
+ next if ($type eq 'proc' || $type eq 'devpts');
+ my @df= `df -k $mount`;
+ my @df_inode = `df -i $mount`;
+# print "$dev $mount $type\n";
+ shift @df;
+ shift @df_inode;
+ for(@df) {
+ my ($dev1,$blocks,$used,$free,$pc,$mount) = split(/\s+/,$_);
+ my ($percent,$blah) = split(/\%/,$pc);
+ if ( ($percent >= $wrn ) && (!($percent >= $crt) || ($mount =~ m/\/mnt\//)) ) {
+ $output .= "$mount is at $pc ";
+ $warn = 1;
+ }
+ if ( ($percent >= $crt ) && !($mount =~ m/\/mnt\//) ){
+ $output = "" unless $crit eq '1';
+ $output .= "$mount is at $pc ";
+ $crit = 1;
+ }
+ }
+ for(@df_inode) {
+ my ($dev1,$inodes,$used,$free,$pc,$mount) = split(/\s+/,$_);
+ my ($percent,$blah) = split(/\%/,$pc);
+ if ( ($percent >= $wrn ) && (!($percent >= $crt) ) ) {
+ $output .= "$mount is at $pc inode usage ";
+ $warn = 1;
+ }
+ if ( ($percent >= $crt ) && !($mount =~ m/\/mnt\//) ){
+ $output = "" unless $crit eq '1';
+ $output .= "$mount is at $pc inode usage ";
+ $crit = 1;
+ }
+ }
+ }
+
+
+#if ( ($warn eq '1') && !($crit eq '1') ) {
+# print "$output\n";
+# $result = 1;
+# }
+if ( $crit eq '1' ) {
+ print "$output\n";
+ $result = 2;
+}
+
+else {
+ print "Disks are OK now\n";
+}
+
+
+#if ( !( $crit eq '1' ) && !( $warn eq '1' ) ) {
+# print "Disks are ok now\n";
+#}
+#print "$result\n";
+exit $result;
--- /dev/null
+#!/usr/bin/perl
+
+use strict;
+
+my $warn;
+my $crit;
+my $out;
+
+my @out;
+my $devices;
+my $pci;
+my $scsi;
+my $derp;
+
+$pci = `/usr/bin/lspci | /bin/grep -i raid | /bin/grep -v PATA | /usr/bin/head -2`;
+$scsi = `/usr/bin/lspci | /bin/grep -i scsi | /bin/grep -v PATA | /usr/bin/head -1`;
+
+# software raid!
+if (-e "/proc/mdstat") {
+ # check software raid!
+# open(R,"/tmp/mdstat");
+ open(R,"/proc/mdstat");
+ while (<R>) {
+ if (/^(md\d+) : (\w+)/) {
+ my $dev = $1;
+ my $status = $2;
+ my $rest = <R>;
+ $devices++;
+
+ my ($disks,$states) = $rest =~ /(\[.*\]) (\[.*\])/;
+ my $mout .= "$dev is $status $disks $states" if $states =~ /_/;
+
+ # recovery?
+ my $next = <R>; # possibly recovery?
+ if ($next =~ / recovery = /) {
+ my ($progress,$per) = $next =~ /(\[.*\])\s+recovery =\s+(\S+%)/;
+ $mout .= " recovery $per";
+ my $next = <R>;
+ if (my ($finish,$speed) = $next =~ /finish=(.*)min speed=(.*)\/sec/) {
+ $mout .= " finish $finish min";
+ }
+ $warn = 1;
+ } elsif ($next =~ / resync = /) {
+ my ($progress,$per) = $next =~ /(\[.*\])\s+resync =\s+(\S+%)/;
+ $mout .= " resync $per";
+ if (my ($finish,$speed) = $next =~ /finish=(.*)min speed=(.*)\/sec/) {
+ $mout .= " finish $finish min";
+ }
+ $warn = 1;
+ } elsif ($states =~ /_/) { # not all U
+ $crit = 1;
+ }
+
+ push( @out, $mout ) if $mout;
+ }
+ }
+}
+
+
+# mylex raid!
+if ($pci =~ /Mylex/i) {
+#if (1) {
+ my $s = `cat /proc/rd/status`;
+ chomp($s);
+ unless ($s =~ /OK/) {
+ my @myinfo;
+ for my $ctl (`ls -d /proc/rd/c*`) {
+# for my $ctl ('/proc/rd/c0') {
+ chomp $ctl;
+ my %bad;
+ my ($c) = $ctl =~ /\/(c\d)$/;
+ open(S,"$ctl/current_status") || print "can't open $ctl/current_status\n";;
+# open(S,"/tmp/mylex.bad");
+ my $lastdevice;
+ while (<S>) {
+ # disk status
+ if (/^ (\d:\d) Vendor/) {
+ $lastdevice = $1;
+ }
+ if (/ Disk Status: (\S+),/) {
+ if ($1 ne 'Online') {
+ push( @myinfo, "$c disk $lastdevice $1");
+ }
+ }
+
+ # logical drives
+ if (/ (\/dev\/rd\/\S+): (\S+), (\w+),/) {
+ my $dev = $1;
+ my $type = $2;
+ my $status = $3;
+ $devices++;
+ $bad{$dev} = 1;
+ if ($status ne 'Online') {
+ push( @myinfo, "$dev ($type) $status");
+ }
+ }
+
+ # rebuild?
+ if (/ Rebuild in Progress: .* \((\S+)\) (\d+%) completed/) {
+ push( @myinfo, "$1 rebuild $2 complete" );
+ delete $bad{$1};
+ }
+ }
+ if (keys %bad) {
+ $crit = 1; # at least 1 is failed and !recovering
+ } else {
+ $warn = 1; # all are recovering
+ }
+ }
+
+ push( @out, "Mylex $s: " . join(', ',@myinfo)) if @myinfo;
+ }
+}
+
+
+# icp vortex raid!
+if ( $pci =~ /intel/i) {
+ opendir(D,"/proc/scsi/gdth");
+ my @dev = readdir(D);
+ closedir D;
+ my @vortex;
+ for my $dev (@dev) {
+ next if $dev =~ /^\./;
+ my $read = `cat /proc/scsi/gdth/$dev`;
+ # my $read = `cat /tmp/asdf9.warn`;
+ my $cur; # Logical | Physical | Host | Array
+ my @myinfo;
+# print "dev $dev\n";
+ for $_ (split(/\n/,$read)) {
+ chomp;
+ if (/^\w/) {
+ # new section
+ ($cur) = /^(\w+)/;
+# print "cur = $cur\n";
+ next;
+ }
+ if ($cur eq 'Logical') {
+ my ($num,$status) = /Number:\s+(\d+)\s+Status:\s+(\w+)/;
+ next unless $status;
+ if ($status ne 'ok') {
+ $warn = 1;
+ #push( @myinfo, "Logical #$num $status" );
+ unshift( @myinfo, "Logical #$num $status" );
+ }
+ }
+ if ($cur eq 'Array') {
+ my ($num,$status) = /Number:\s+(\d+)\s+Status:\s+(\w+)/;
+ next unless $status;
+ if ($status ne 'ready') {
+ $warn = 1;
+ #push( @myinfo, "Array #$num $status" );
+ unshift( @myinfo, "Array #$num $status" );
+ }
+ }
+ if ($cur eq 'Host') {
+ if (/Number/) {
+ $devices++;
+ }
+ }
+ if ($cur eq 'Controller') {
+ # push( @myinfo, $_ );
+ unshift( @myinfo, $_ );
+ }
+ }
+
+ if (@myinfo) {
+ # push( @vortex, "dev $dev: " . join(', ', @myinfo) );
+ # unshift( @vortex, "dev $dev: " . join(', ', @myinfo) );
+ push( @vortex, "dev $dev: " . join(', ', $myinfo[0], $myinfo[1], $myinfo[2], $myinfo[3], $myinfo[4] ) );
+ # $warn = 1;
+ }
+ }
+
+ if (@vortex) {
+ # push( @out, 'Vortex: ' . join('. ', @vortex) );
+ push( @out, 'Vortex: ' . join('. ', @vortex) );
+ }
+}
+# SAS megaraid
+if ( $pci =~ /LSI\ Logic/i) {
+ my $read = `/usr/bin/sudo /usr/sbin/megacli -LDInfo -lall -a0`;
+ for $_ (split(/\n/,$read)) {
+ chomp;
+ # The line we care about is State: Optimal, if we don't have that, we've problems
+ if ($_ =~/^State\s*\:\s*(.*)/m) {
+ $devices++;
+ #/^State\?:\s?(\w+)/;
+ my $state = $1;
+ next unless $state;
+ if ($state ne 'Optimal') {
+ my $rebuild = `/usr/bin/sudo /usr/sbin/megacli -PDList -a0 | /bin/grep -i firmware`;
+ if ( $rebuild =~ /Rebuild/i) {
+ my $enclosure = `/usr/bin/sudo /usr/sbin/megacli -PDList -a0 | /bin/grep -B15 Rebuild | /bin/grep -e Enclosure -e Slot | /usr/bin/cut -d':' -f2 | /usr/bin/awk '{printf \$1\":\"}' | /usr/bin/awk -F ":" '{printf \$1":"\$2}'`;
+ #my $rebuildstatus = `/usr/bin/sudo /usr/sbin/megacli -PDRbld -ShowProg -PhysDrv\[$enclosure\] -a0 | /bin/grep -i rebuild`;
+ my $rebuildstatus = `/usr/bin/sudo /usr/sbin/megacli -PDRbld -ShowProg -PhysDrv\[$enclosure\] -a0 | /bin/egrep -i \'\(rebuild\|not found\)\'`;
+ if ($rebuildstatus =~ /not found/m) {
+ # check by device id instead of enclosure id if we get a not found error above
+ $enclosure = `/usr/bin/sudo /usr/sbin/megacli -PDList -a0 | /bin/grep -B15 Rebuild | /bin/grep -e Enclosure -e Slot | /bin/grep -v position | /usr/bin/cut -d':' -f2 | /usr/bin/awk '{printf \$1\":\"}' | /usr/bin/awk -F ":" '{printf \$1":"\$2}'`;
+ $rebuildstatus = `/usr/bin/sudo /usr/sbin/megacli -PDRbld -ShowProg -PhysDrv\[$enclosure\] -a0 | /bin/grep -i rebuild`;
+ }
+ for $_ ($rebuildstatus) {
+ $crit = 1;
+ push(@out,$_);
+ }
+ } else {
+ $crit = 1;
+ my $virtual=`/usr/bin/sudo /usr/sbin/megacli -LDInfo -lall -a0 | grep -i failed -B6 | grep -i virtual | cut -d'(' -f1`;
+ push(@out, $virtual, $_);
+ }
+ }
+ }
+ # Should to catch the syntax or permissions errors this thing spits out
+ if (/ERROR/i) {
+ $crit = 1;
+ push(@out, $_);
+ foreach my $k (@out)
+ {
+ print $_;
+ }
+ }
+ }
+}
+
+# e3ware
+if ( $pci =~ /3ware/i) {
+ open(CLI,"/usr/bin/sudo /usr/sbin/tw_cli show|");
+ #my $read = `/usr/sbin/megacli -LDInfo -l0 -a0`;
+
+ $devices++;
+ my @controllers;
+ while (<CLI>) {
+ if ( $_ =~ /^c[0-9]/ ) {
+ my ($c) = split(/\s+/,$_);
+ push(@controllers,$c);
+ }
+ }
+ close(CLI);
+
+ foreach my $cont (@controllers) {
+ open(CLI,"/usr/bin/sudo /usr/sbin/tw_cli /$cont show|");
+ while (<CLI>) {
+ if ( $_ =~ /^u[0-9]+/ ) {
+ my @info = split(/\s+/,$_);
+ if ( $info[2] ne 'OK' ) {
+ if ( $info[2] =~ /REBUILDING/i) {
+ my $rebuildstatus = `/usr/bin/sudo /usr/sbin/tw_cli /$cont/$info[0] show | /bin/grep REBUILD | /bin/grep -v RAID-10`;
+ for $_ ($rebuildstatus) {
+ $crit = 1;
+ push(@out,$_);
+ }
+ } else {
+ $crit = 1;
+ push(@out,$_);
+ }
+ }
+ }
+ if ( $_ =~ /^p[0-9]+/ ) {
+ my @info = split(/\s+/,$_);
+ if ( $info[1] ne 'OK' ) {
+ $crit = 1;
+ push(@out,$_);
+ }
+ }
+ }
+ }
+}
+
+#Areca
+
+if ( $pci =~ /areca/i) {
+ open(CLI,"sudo /usr/sbin/cli64 vsf info|");
+ while (<CLI>) {
+ if ( $_ =~ /^\ \ [0-9]+/ ) {
+ $devices++;
+ my @info = split(/\s+/,$_);
+ if ( $_ !~ /Normal/i) {
+ $crit = 1;
+ push(@out,$_);
+ }
+ }
+ }
+ }
+
+if ( $scsi =~ /LSI Logic/i) {
+ open(CLI,"sudo /usr/sbin/mpt-status | /usr/bin/head -1 |");
+ $devices++;
+ while (<CLI>) {
+ if ( $_ =~ /^ioc/ ) {
+ my @info = split(/\s+/,$_);
+ if ( $info[10] ne 'OPTIMAL,' ) {
+ $crit = 1;
+ push(@out,$_);
+ }
+ }
+ }
+ }
+
+# show results
+my $result = 0;
+$result = 1 if $warn;
+$result = 2 if $crit;
+# print "warn = $warn crit = $crit\n";
+print $derp;
+my $out = "No raid devices found $pci";
+$out = "All $devices raid devices happy as clams" if $devices;
+if (@out) {
+ $out = join('; ', @out);
+}
+
+print "$out\n";
+exit $result;
--- /dev/null
+#!/usr/bin/perl
+
+use strict;
+
+my $warn;
+my $crit;
+my $out;
+
+my @out;
+my $drives;
+my $pci;
+my $scsi;
+my $type;
+my $mdadm;
+my $fullcommand;
+my $message;
+
+my $hostname = `uname -n`;
+chomp $hostname;
+my $pci = `lspci | /bin/grep -i raid | /bin/grep -v PATA | /usr/bin/head -1`;
+my $scsi = `lspci | /bin/grep -i scsi | /bin/grep -v PATA | /usr/bin/head -1`;
+
+my $smartctl = "/usr/sbin/smartctl";
+
+our $realloc = '50';
+our $pend = '1';
+our $uncorrect = '1';
+
+if ( $hostname =~ /mira/i )
+{
+ $realloc = '200';
+ $pend = '1';
+ $uncorrect = '1';
+}
+
+unless ( -x $smartctl )
+{
+ $crit = 1;
+ push(@out,"smartmontools package missing or broken");
+}
+
+
+sub smartctl
+{
+ my $command=$_[0];
+ my $raidtype=$_[1];
+ my $drive=$_[2];
+ my $scsidev=$_[3];
+
+ if ( $raidtype =~ /areca/i )
+ {
+ $fullcommand = "sudo $command -a -d areca,$drive $scsidev |";
+ }
+ if ( $raidtype =~ /mdadm/i )
+ {
+ $fullcommand = "sudo $command -a -d ata /dev/$drive|";
+ }
+ if ( $raidtype =~ /none/i )
+ {
+ $fullcommand = "sudo $command -a -d sat /dev/$drive|";
+ }
+
+ open(SMART,$fullcommand);
+ while (<SMART>)
+ {
+ if ( $_ =~ /FAILING_NOW/ )
+ {
+ my @fail = split;
+ $message = "Drive $drive is S.M.A.R.T. failing for $fail[1]";
+ $crit = 1;
+ push(@out,$message);
+ }
+ if (( $_ =~ /_sector/i ) || ( $_ =~ /d_uncorrect/i ))
+ {
+ my @sector = split;
+ if ( $sector[1] =~ /reallocated/i )
+ {
+ $type = "reallocated";
+ }
+ if ( $sector[1] =~ /pending/i )
+ {
+ $type = "pending";
+ }
+ if ( $sector[1] =~ /d_uncorrect/i )
+ {
+ $type = "uncorrect";
+ }
+ foreach ( $sector[9] )
+ {
+ my $count = $_;
+ $message = "Drive $drive has $count $type sectors";
+
+ if ( ( $type =~ /reallocated/i && $count > $realloc ) && ( $type =~ /pending/i && $count > $pend ) && ( $type =~ /pending/i && $count > $uncorrect ) )
+ {
+ $crit = 1;
+ push(@out,$message);
+ }
+ else
+ {
+ if ( $type =~ /reallocated/i && $count > $realloc )
+ {
+ $crit = 1;
+ push(@out,$message);
+ }
+ if ( $type =~ /pending/i && $count > $pend )
+ {
+ $crit = 1;
+ push(@out,$message);
+ }
+ if ( $type =~ /uncorrect/i && $count > $uncorrect )
+ {
+ $crit = 1;
+ push(@out,$message);
+ }
+ }
+ }
+ }
+ }
+}
+
+#1068 IT controller OR Intel SAS.
+if ( $scsi =~ /SAS1068E/i || $scsi =~ /Patsburg/i )
+{
+ open(BLOCK,"cat /proc/partitions | grep -w sd[a-z] |");
+ while (<BLOCK>)
+ {
+ my @output = split;
+ my $blockdevice = $output[3];
+ foreach ( $blockdevice )
+ {
+ $drives++;
+ smartctl("$smartctl","none",$blockdevice,"none");
+ }
+ }
+}
+
+
+# software raid!
+if (-e "/proc/mdstat")
+{
+ open(R,"/proc/mdstat");
+ while (<R>)
+ {
+ if (/^(md\d+) : (\w+)/)
+ {
+ $mdadm = $mdadm + 1;
+ }
+ }
+ if ( $mdadm gt 0 )
+ {
+ open(BLOCK,"cat /proc/partitions | grep -w sd[a-z] |");
+ while (<BLOCK>)
+ {
+ my @output = split;
+ my $blockdevice = $output[3];
+ foreach ( $blockdevice )
+ {
+ $drives++;
+ smartctl("$smartctl","mdadm",$blockdevice,"none");
+ }
+ }
+ }
+}
+
+#areca hardware raid
+if ( $pci =~ /areca/i)
+{
+ my $firmware = `sudo /usr/sbin/cli64 sys info | grep -i firm | awk '{print \$5}' | cut -d'-' -f1`;
+ chomp $firmware;
+
+ if ( $firmware < 2011 )
+ {
+ $crit = 1;
+ $message = "Controller needs newer firmware for S.M.A.R.T. support";
+ push(@out,$message);
+ }
+
+ my $vsf= `sudo /usr/sbin/cli64 vsf info | grep -v Capacity | grep -v ======== | grep -v ErrMsg | wc -l`;
+ chomp $vsf;
+ my $scsidev = "/dev/sg$vsf";
+ open(CLI,"sudo /usr/sbin/cli64 disk info | grep -vi Modelname | grep -v ====== | grep -vi GuiErr | grep -vi Free | grep -vi Failed | grep -vi 'N.A.' |");
+ while (<CLI>)
+ {
+ $drives++;
+ if ( $_ =~ /^\ \ [0-9]+/ )
+ {
+ my @info = split(/\s+/,$_);
+ foreach ($info[1])
+ {
+ my $drive = $_;
+ smartctl("$smartctl","areca",$drive,$scsidev);
+ }
+ }
+ }
+}
+
+# show results
+my $result = 0;
+$result = 1 if $warn;
+$result = 2 if $crit;
+# print "warn = $warn crit = $crit\n";
+
+my $out = "No real disks found on machine";
+$out = "All $drives drives happy as clams" if $drives;
+
+if (@out)
+{
+ $out = join('; ', @out);
+}
+
+print "$out\n";
+exit $result;
name: ubuntu
group: kvm
+- name: Upload megacli and cli64 for raid monitoring and smart.pl to /usr/sbin/.
+ copy:
+ src: "../files/sbin/{{ item }}"
+ dest: "/usr/sbin/{{ item }}"
+ owner: root
+ group: root
+ mode: 0755
+ with_items:
+ - megacli
+ - cli64
+
+- name: Create /usr/libexec.
+ file:
+ path: /usr/libexec
+ owner: root
+ group: root
+ mode: 0755
+ state: directory
+
+- name: Upload custom netsaint scripts for raid/disk/smart/monitoring to /usr/libexec/.
+ copy:
+ src: "../files/libexec/{{ item }}"
+ dest: "/usr/libexec/{{ item }}"
+ owner: root
+ group: root
+ mode: 0755
+ with_items:
+ - smart.pl
+ - raid.pl
+ - diskusage.pl
+
- name: Include version specific tasks.
include: redhat/rhel_7.0.yml
when: ansible_distribution_version == "7.0"