From: David Galloway <dgallowa@redhat.com>
Date: Mon, 11 Jul 2016 22:43:38 +0000 (-0400)
Subject: Move NRPE setup to common role
X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=ae480540cfeb4bac66a9cfe53dec365fd753a8f7;p=ceph-cm-ansible.git

Move NRPE setup to common role

Signed-off-by: David Galloway <dgallowa@redhat.com>
---

diff --git a/roles/common/files/libexec/diskusage.pl b/roles/common/files/libexec/diskusage.pl
new file mode 100644
index 0000000..49200da
--- /dev/null
+++ b/roles/common/files/libexec/diskusage.pl
@@ -0,0 +1,123 @@
+#!/usr/bin/perl
+
+# {{ ansible_managed }}
+
+#******************************************************************************************
+#
+# NRPE DISK USAGE PLUGIN
+#
+# Program: Disk Usage plugin written to be used with Netsaint and NRPE
+# License: GPL
+# Copyright (c) 2000 Jeremy Hanmer (jeremy@newdream.net)
+#
+# Last Modified: 10/23/00
+# 
+# Information:  Basically, I wrote this because I had to deal with large numbers of 
+# machines with a wide range of disk configurations, and with dynamically mounted 
+# partitions.  The basic check_disk plugin relied on a static configuration file which
+# doesn't lend itself to being used in a heterogeneous environnment (especially when
+# you can't guarantee that the devices listed in the configuration file will be mounted).
+#
+# Bugs:  Currently, this plugin only works on EXT2 partitions (although it's easy to change).
+#
+# Command Line: diskusage.pl <warning percentage> <critical percentage>
+#
+# Tested Systems:  Mandrake 7.1/Intel, Debian 2.2/Intel, Debian 2.1/Intel
+#
+# License Information:
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+#*******************************************************************************************
+
+
+use strict;
+
+my $wrn = shift @ARGV;
+my $crt = shift @ARGV;
+my $output;
+my $count;
+my %type;
+my $result = 0;
+my $warn = 0;
+my $crit = 0;
+my @parts;
+my $hostname = `hostname`;
+chomp $hostname;
+@parts = `mount | grep -vi fuse`;
+
+#if ( $hostname eq 'zartan' ) {
+#	@parts = `mount`;
+#}
+#else {
+#	@parts = `mount -t ext2,reiserfs`;
+#}
+for (@parts) {
+	my ($dev,$on,$mount,$tp,$type,$options) = split(/\s+/,$_);
+		next if ($type eq 'nfs' && !($hostname eq 'zartan'));
+		next if ($type eq 'proc' || $type eq 'devpts');
+		my @df= `df -k $mount`;
+		my @df_inode = `df -i $mount`;
+#		print "$dev $mount $type\n";
+		shift @df;
+		shift @df_inode;
+		for(@df) {
+			my ($dev1,$blocks,$used,$free,$pc,$mount) = split(/\s+/,$_);
+			my ($percent,$blah) = split(/\%/,$pc);
+			if ( ($percent >= $wrn ) && (!($percent >= $crt) || ($mount =~ m/\/mnt\//)) ) {
+				$output .= "$mount is at $pc    ";
+				$warn = 1;
+			}
+			if ( ($percent >= $crt ) && !($mount =~ m/\/mnt\//) ){
+				$output = "" unless $crit eq '1';
+				$output .= "$mount is at $pc    ";
+				$crit = 1;
+			}
+		}
+		for(@df_inode) {
+			my ($dev1,$inodes,$used,$free,$pc,$mount) = split(/\s+/,$_);
+			my ($percent,$blah) = split(/\%/,$pc);
+			if ( ($percent >= $wrn ) && (!($percent >= $crt) ) ) {
+				$output .= "$mount is at $pc inode usage    ";
+				$warn = 1;
+			}
+			if ( ($percent >= $crt ) && !($mount =~ m/\/mnt\//) ){
+				$output = "" unless $crit eq '1';
+				$output .= "$mount is at $pc inode usage    ";
+				$crit = 1;
+			}
+		}
+	}
+
+
+#if ( ($warn eq '1') && !($crit eq '1') )  {
+#	print "$output\n";
+#	$result = 1;
+#	}
+if ( $crit eq '1' ) {
+	print "$output\n";
+	$result = 2;
+}
+
+else {
+	print "Disks are OK now\n";
+}
+
+
+#if ( !( $crit eq '1' ) && !( $warn eq '1' ) ) {
+#	print "Disks are ok now\n";
+#}
+#print "$result\n";
+exit $result; 
diff --git a/roles/common/files/libexec/raid.pl b/roles/common/files/libexec/raid.pl
new file mode 100755
index 0000000..f65eedd
--- /dev/null
+++ b/roles/common/files/libexec/raid.pl
@@ -0,0 +1,313 @@
+#!/usr/bin/perl
+
+# {{ ansible_managed }}
+
+use strict;
+
+my $warn;
+my $crit;
+my $out;
+
+my @out;
+my $devices;
+my $pci;
+my $scsi;
+my $derp;
+
+$pci = `/usr/bin/lspci | /bin/grep -i raid | /bin/grep -v PATA | /usr/bin/head -2`;
+$scsi = `/usr/bin/lspci | /bin/grep -i scsi | /bin/grep -v PATA | /usr/bin/head -1`;
+
+# software raid!
+if (-e "/proc/mdstat") {
+    # check software raid!
+#    open(R,"/tmp/mdstat");
+    open(R,"/proc/mdstat");
+    while (<R>) {
+		if (/^(md\d+) : (\w+)/) {
+			my $dev = $1;
+			my $status = $2;
+			my $rest = <R>;
+			$devices++;
+			
+			my ($disks,$states) = $rest =~ /(\[.*\]) (\[.*\])/;
+			my $mout .= "$dev is $status $disks $states" if $states =~ /_/;
+			
+			# recovery?
+			my $next = <R>;  # possibly recovery?
+			if ($next =~ / recovery = /) {
+				my ($progress,$per) = $next =~ /(\[.*\])\s+recovery =\s+(\S+%)/;
+				$mout .= " recovery $per";
+				my $next = <R>;
+				if (my ($finish,$speed) = $next =~ /finish=(.*)min speed=(.*)\/sec/) {
+					$mout .= " finish $finish min";
+				}
+				$warn = 1;
+            } elsif ($next =~ / resync = /) {
+                my ($progress,$per) = $next =~ /(\[.*\])\s+resync =\s+(\S+%)/;
+                $mout .= " resync $per";
+                if (my ($finish,$speed) = $next =~ /finish=(.*)min speed=(.*)\/sec/) {
+                    $mout .= " finish $finish min";
+                }
+                $warn = 1;
+			} elsif ($states =~ /_/) {  # not all U
+				$crit = 1;
+			}
+			
+			push( @out, $mout ) if $mout;
+		}
+    }
+}
+
+
+# mylex raid!
+if ($pci =~ /Mylex/i) {
+#if (1) {
+    my $s = `cat /proc/rd/status`;
+    chomp($s);
+    unless ($s =~ /OK/) {
+	my @myinfo;
+	for my $ctl (`ls -d /proc/rd/c*`) {
+#	for my $ctl ('/proc/rd/c0') {
+	    chomp $ctl;
+	    my %bad;
+	    my ($c) = $ctl =~ /\/(c\d)$/;
+	    open(S,"$ctl/current_status") || print "can't open $ctl/current_status\n";;
+#	    open(S,"/tmp/mylex.bad");
+	    my $lastdevice;
+	    while (<S>) {
+		# disk status
+		if (/^    (\d:\d)  Vendor/) {
+		    $lastdevice = $1;
+		}
+		if (/ Disk Status: (\S+),/) {
+		    if ($1 ne 'Online') {
+			push( @myinfo, "$c disk $lastdevice $1");
+		    }
+		}
+
+		# logical drives
+		if (/    (\/dev\/rd\/\S+): (\S+), (\w+),/) {
+		    my $dev = $1;
+		    my $type = $2;
+		    my $status = $3;
+		    $devices++;
+		    $bad{$dev} = 1;
+		    if ($status ne 'Online') {
+			push( @myinfo, "$dev ($type) $status");
+		    }
+		}
+
+		# rebuild?
+		if (/  Rebuild in Progress: .* \((\S+)\) (\d+%) completed/) {
+		    push( @myinfo, "$1 rebuild $2 complete" );
+		    delete $bad{$1};
+		}
+	    }
+	    if (keys %bad) {
+		$crit = 1;  # at least 1 is failed and !recovering
+	    } else {
+		$warn = 1;   # all are recovering
+	    }
+	}
+
+	push( @out, "Mylex $s: " . join(', ',@myinfo)) if @myinfo;
+    }
+}
+
+
+# icp vortex raid!
+if ( $pci =~ /intel/i) {
+    opendir(D,"/proc/scsi/gdth");
+    my @dev = readdir(D);
+    closedir D;
+    my @vortex;
+    for my $dev (@dev) {
+	next if $dev =~ /^\./;
+	my $read = `cat /proc/scsi/gdth/$dev`;
+	# my $read = `cat /tmp/asdf9.warn`;
+	my $cur;   # Logical | Physical | Host | Array
+	my @myinfo;
+#	print "dev $dev\n";
+	for $_ (split(/\n/,$read)) {
+	    chomp;
+	    if (/^\w/) {
+		# new section
+		($cur) = /^(\w+)/;
+#		print "cur = $cur\n";
+		next;
+	    }
+	    if ($cur eq 'Logical') {
+		my ($num,$status) = /Number:\s+(\d+)\s+Status:\s+(\w+)/;
+		next unless $status;
+		if ($status ne 'ok') {
+		    $warn = 1;
+		    #push( @myinfo, "Logical #$num $status" );
+		    unshift( @myinfo, "Logical #$num $status" );
+		}
+	    }
+	    if ($cur eq 'Array') {
+		my ($num,$status) = /Number:\s+(\d+)\s+Status:\s+(\w+)/;
+		next unless $status;
+		if ($status ne 'ready') {
+		    $warn = 1;
+		    #push( @myinfo, "Array #$num $status" );
+		    unshift( @myinfo, "Array #$num $status" );
+		}
+	    }
+	    if ($cur eq 'Host') {
+		if (/Number/) {
+		    $devices++;
+		}
+	    }
+	    if ($cur eq 'Controller') {
+		# push( @myinfo, $_ );
+		unshift( @myinfo, $_ );
+	    }
+	}
+	
+	if (@myinfo) {
+	    # push( @vortex, "dev $dev: " . join(', ', @myinfo) );
+	    # unshift( @vortex, "dev $dev: " . join(', ', @myinfo) );
+	    push( @vortex, "dev $dev: " . join(', ', $myinfo[0], $myinfo[1], $myinfo[2], $myinfo[3], $myinfo[4] ) );
+	    # $warn = 1;
+	}
+    }
+
+    if (@vortex) {
+	# push( @out, 'Vortex: ' . join('.   ', @vortex) );
+	push( @out, 'Vortex: ' . join('.   ', @vortex) );
+    }
+}
+# SAS megaraid
+if ( $pci =~ /LSI\ Logic/i) {
+    my $read = `/usr/bin/sudo /usr/sbin/megacli -LDInfo -lall -a0`;
+    for $_ (split(/\n/,$read)) {
+    	chomp;
+	# The line we care about is State: Optimal, if we don't have that, we've problems
+	if ($_ =~/^State\s*\:\s*(.*)/m) {
+            $devices++;
+	    #/^State\?:\s?(\w+)/;
+	    my $state = $1;
+	    next unless $state;
+	    if ($state ne 'Optimal') {
+		my $rebuild = `/usr/bin/sudo /usr/sbin/megacli -PDList -a0 | /bin/grep -i firmware`;
+			if ( $rebuild =~ /Rebuild/i) {
+				my $enclosure = `/usr/bin/sudo /usr/sbin/megacli -PDList -a0 | /bin/grep -B15 Rebuild | /bin/grep -e Enclosure -e Slot | /usr/bin/cut -d':' -f2 | /usr/bin/awk '{printf \$1\":\"}' | /usr/bin/awk -F ":" '{printf \$1":"\$2}'`;
+				#my $rebuildstatus = `/usr/bin/sudo /usr/sbin/megacli -PDRbld -ShowProg -PhysDrv\[$enclosure\] -a0 | /bin/grep -i rebuild`;
+				my $rebuildstatus = `/usr/bin/sudo /usr/sbin/megacli -PDRbld -ShowProg -PhysDrv\[$enclosure\] -a0 | /bin/egrep -i \'\(rebuild\|not found\)\'`;
+				if ($rebuildstatus =~ /not found/m) {
+				   # check by device id instead of enclosure id if we get a not found error above
+				   $enclosure = `/usr/bin/sudo /usr/sbin/megacli -PDList -a0 | /bin/grep -B15 Rebuild | /bin/grep -e Enclosure -e Slot | /bin/grep -v position | /usr/bin/cut -d':' -f2 | /usr/bin/awk '{printf \$1\":\"}' | /usr/bin/awk -F ":" '{printf \$1":"\$2}'`;
+				   $rebuildstatus = `/usr/bin/sudo /usr/sbin/megacli -PDRbld -ShowProg -PhysDrv\[$enclosure\] -a0 | /bin/grep -i rebuild`;
+				}
+					for $_ ($rebuildstatus) {
+					$crit = 1;
+					push(@out,$_);
+					}
+			} else {
+	        $crit = 1;
+                my $virtual=`/usr/bin/sudo /usr/sbin/megacli -LDInfo -lall -a0 | grep -i failed -B6 | grep -i virtual | cut -d'(' -f1`;
+		push(@out, $virtual, $_);
+		}
+	    }
+	}	
+        # Should to catch the syntax or permissions errors this thing spits out
+	if (/ERROR/i) {
+	    $crit = 1;
+	    push(@out, $_);
+	foreach my $k (@out)
+	{
+		print $_;
+	}
+	}
+    }
+}
+
+# e3ware
+if ( $pci =~ /3ware/i) {
+	open(CLI,"/usr/bin/sudo /usr/sbin/tw_cli show|");
+	#my $read = `/usr/sbin/megacli -LDInfo -l0 -a0`;
+
+	$devices++;
+	my @controllers;
+	while (<CLI>) {
+		if ( $_ =~ /^c[0-9]/ ) {
+			my ($c) = split(/\s+/,$_);
+			push(@controllers,$c);
+		}
+	}
+	close(CLI);
+
+	foreach my $cont (@controllers) {
+		open(CLI,"/usr/bin/sudo /usr/sbin/tw_cli /$cont show|");
+		while (<CLI>) {
+			if ( $_ =~ /^u[0-9]+/ ) {
+				my @info = split(/\s+/,$_);
+				if ( $info[2] ne 'OK' ) {
+					if ( $info[2] =~ /REBUILDING/i) {
+						my $rebuildstatus = `/usr/bin/sudo /usr/sbin/tw_cli /$cont/$info[0] show | /bin/grep REBUILD | /bin/grep -v RAID-10`;
+							for $_ ($rebuildstatus) {
+							$crit = 1;
+							push(@out,$_);
+							}
+					} else {
+					$crit = 1;
+					push(@out,$_);
+					}
+				}
+			}
+			if ( $_ =~ /^p[0-9]+/ ) {
+				my @info = split(/\s+/,$_);
+				if ( $info[1] ne 'OK' ) {
+					$crit = 1;
+					push(@out,$_);
+				}
+			}
+		}
+	}	
+}
+
+#Areca
+
+if ( $pci =~ /areca/i) {
+                open(CLI,"sudo /usr/sbin/cli64 vsf info|");
+                while (<CLI>) {
+                        if ( $_ =~ /^\ \ [0-9]+/ ) {
+				$devices++;
+                                my @info = split(/\s+/,$_);
+				if ( $_ !~ /Normal/i) {
+                                        $crit = 1;
+                                        push(@out,$_);
+                                }
+                        }
+                }
+        }
+
+if ( $scsi =~ /LSI Logic/i) {
+                open(CLI,"sudo /usr/sbin/mpt-status | /usr/bin/head -1 |");
+                $devices++;
+                while (<CLI>) {
+                        if ( $_ =~ /^ioc/ ) {
+                                my @info = split(/\s+/,$_);
+                                if ( $info[10] ne 'OPTIMAL,' ) {
+                                        $crit = 1;
+                                        push(@out,$_);
+                                }
+                        }
+                }
+        }
+
+# show results
+my $result = 0;
+$result = 1 if $warn;
+$result = 2 if $crit;
+# print "warn = $warn crit = $crit\n";
+print $derp;
+my $out = "No raid devices found $pci";
+$out = "All $devices raid devices happy as clams" if $devices;
+if (@out) {
+    $out = join(';     ', @out);  
+}
+
+print "$out\n";
+exit $result;
diff --git a/roles/common/files/libexec/smart.sh b/roles/common/files/libexec/smart.sh
new file mode 100755
index 0000000..2f71a60
--- /dev/null
+++ b/roles/common/files/libexec/smart.sh
@@ -0,0 +1,290 @@
+#!/bin/bash
+# Description:	Bash script to check drive health using pending, uncorrectable,
+# and reallocated sector count
+#
+# Nagios return codes: 0 = OK; 1 = WARNING; 2 = CRITICAL; 3 = UNKNOWN
+# SMART Attribute Codes:
+#   5 = Reallocated
+#   187 = Reported Uncorrect
+#   197 = Pending
+#   198 = Uncorrectable Sector Count
+#
+# TO-DO: Add support for dynamic SMART attribute lookup.  For example,
+#        187 is reported for Seagate HDD and all SSDs but not Hitachi HDDs.
+#
+# See https://en.wikipedia.org/wiki/S.M.A.R.T.#ATA_S.M.A.R.T._attributes
+
+### Define global variables ###
+# total number of drives (or RAID slots) discovered
+numdrives=0
+# Number of failed, failing, and/or missing drives
+failingdrives=0
+# Fallback message for UNKNOWN return code output
+unknownmsg="Unknown error"
+# Return code for nagios (Default to SUCCESS)
+rc=0
+# Array of messages indicating drive health.  Output after nagios status.
+declare -a messages
+
+### Functions ###
+main ()
+{
+  preflight
+
+  if [ "$raid" = true ]
+  then
+    areca_smart
+    areca_failed
+  elif [ "$raid" = false ]
+  then
+    normal_smart
+  else
+    echo "ERROR - Could not determine if RAID present"
+    exit 3
+  fi
+
+  ## Return UNKNOWN if no drives found
+  if [ "$numdrives" -eq "0" ]
+  then
+    unknownmsg="No drives found!"
+    rc=3
+  fi
+  
+  ## Return code and service status for nagios
+  if [ "$rc" = 0 ]
+  then
+    echo "OK - All $numdrives drives healthy"
+  elif [ "$rc" = 1 ]
+  then
+    echo "WARNING - $failingdrives of $numdrives drives sick"
+  elif [ "$rc" = 2 ]
+  then
+    echo "CRITICAL - $failingdrives of $numdrives drives need replacing"
+  elif [ "$rc" = 3 ]
+  then
+    echo "UNKNOWN - $unknownmsg"
+  else
+    echo "ERROR - Got no return code"
+  fi
+  
+  ## Iterate through array of messages
+  # Nagios reads and displays the first line of output on the Services page.
+  # All individual messages about failed/failing disk statistics can be viewed
+  # on the individual system's SMART detail page in nagios.
+  for msg in "${messages[@]}"
+  do
+    echo "$msg"
+  done
+  
+  exit $rc
+}
+
+# Pre-flight checks
+preflight ()
+{
+  # Set raid var then check for cli64 command and bail if missing
+  if lspci | grep -qi areca
+  then
+    raid=true
+  else
+    raid=false
+  fi
+  
+  if [ "$raid" = true ] && ! [ -x "$(command -v cli64)" ]
+  then
+    echo "ERROR - cli64 command not found or is not executable"
+    exit 3
+  fi
+  
+  # Check for smartmontools and bail if missing
+  if ! [ -x "$(command -v smartctl)" ]
+  then
+    echo "ERROR - smartctl is not installed or is not executable"
+    echo "yum/apt-get install smartmontools"
+    exit 3
+  fi
+}
+
+# Gather smart data for drives behind Areca RAID controller
+areca_smart ()
+{
+  # Store output of cli64 to reduce repeated executions
+  cli64out=$(sudo cli64 disk info | grep -E "Slot#[[:digit:]]")
+  numdrives=$(echo "$cli64out" | wc -l)
+  # Loop through all disks not marked as 'N.A.' or 'Failed'
+  for slot in $(echo "$cli64out" | grep -v 'N.A.\|Failed' \
+  | grep -o "Slot#[[:digit:]]" | cut -c6-)
+  do
+    failed=false
+    # Determine if disk is JBOD or part of hardware RAID
+    if echo "$cli64out" | grep -E "Slot#$slot" | grep -q 'JBOD'
+    then
+      jbod=true
+    else
+      jbod=false
+    fi
+    output=$(sudo cli64 disk smart drv=$slot \
+    | grep -E "^  "5"|^"197"|^"198"" | awk '{ print $(NF-1) }' | tr '\n' ' ')
+    outputcount=$(echo $output | wc -w)
+    # Only continue if we received 3 SMART data points
+    if [ "$outputcount" = "3" ]
+    then
+      # Only do slot to drive letter matching once per bad JBOD
+      if [[ $output != "0 0 0 " ]] && [ "$jbod" = true ]
+      then
+        dl=$(areca_bay_to_letter $slot)
+      elif [ "$jbod" = false ]
+      then
+        dl="(RAID)"
+      fi
+      read reallocated pending uncorrect <<< $output
+      if [ "$reallocated" != "0" ]
+      then
+        messages+=("Drive $slot $dl has $reallocated reallocated sectors")
+        failed=true
+        # A small number of reallocated sectors is OK
+        if [ "$reallocated" -le 5 ]
+        then
+          rc=1 # Warn if <= 5
+        else
+          rc=2 # Crit if >5
+        fi
+      fi
+      if [ "$pending" != "0" ]
+      then
+        messages+=("Drive $slot $dl has $pending pending sectors")
+        failed=true
+        rc=2
+      fi
+      if [ "$uncorrect" != "0" ]
+      then
+        messages+=("Drive $slot $dl has $uncorrect uncorrect sectors")
+        failed=true
+        rc=2
+      fi
+    else
+      messages+=("Drive $slot returned $outputcount of 3 expected attributes")
+      unknownmsg="SMART data could not be read for one or more drives"
+      rc=3
+    fi
+    # Make sure drives with multiple types of bad sectors only get counted once
+    if [ "$failed" = true ]
+    then
+      let "failingdrives+=1"
+    fi
+  done
+}
+
+# Correlate Areca drive bay to drive letter
+areca_bay_to_letter ()
+{
+  # Get S/N according to RAID controller given argument $1 (slot #)
+  areca_serial=$(sudo cli64 disk info drv=$1 | grep 'Serial Number' \
+  | awk '{ print $NF }')
+  # Loop through and get S/N according to smartctl given drive name
+  for dl in $(cat /proc/partitions | grep -w 'sd[a-z]\|sd[a-z]\{2\}' \
+  | awk '{ print $NF }')
+  do
+    smart_serial=$(sudo smartctl -a /dev/$dl | grep "Serial number" \
+    | awk '{ print $NF }')
+    # If cli64 and smartctl find a S/N match, return drive letter
+    if [ "$areca_serial" = "$smart_serial" ]
+    then
+      echo "($dl)"
+    fi
+  done
+}
+
+# Tally missing and failed drives connected to Areca RAID
+areca_failed ()
+{
+  # Store output of cli64 to reduce repeated executions
+  cli64out=$(sudo cli64 disk info | grep -E "Slot#[[:digit:]]")
+  # Missing (N.A.) drives
+  for drive in $(echo "$cli64out" | grep -E "Slot#[[:digit:]]" \
+  | grep "N.A." | awk '{ print $1 }')
+  do
+    messages+=("Drive $drive is missing")
+    let "failingdrives+=1"
+    rc=2
+  done
+  # Hard failed drives
+  for drive in $(echo "$cli64out" | grep -E "Slot#[[:digit:]]" \
+  | grep 'Failed' | awk '{ print $1 }')
+  do
+    messages+=("Drive $drive failed")
+    let "failingdrives+=1"
+    rc=2
+  done
+}
+
+# Standard SATA/SAS drive smartctl check
+normal_smart ()
+{
+  # The grep regex will include drives named sdaa, for example
+  numdrives=$(cat /proc/partitions | grep -w 'sd[a-z]\|sd[a-z]\{2\}' | wc -l)
+  for l in $(cat /proc/partitions | grep -w 'sd[a-z]\|sd[a-z]\{2\}' \
+  | awk '{ print $NF }')
+  do
+    failed=false
+    output=$(sudo smartctl -a /dev/$l | grep -E "^  "5"|^"197"|^"198"" \
+    | awk '{ print $NF }' | tr '\n' ' ')
+    outputcount=$(echo $output | wc -w)
+    # Check if drive is SSD and set var accordingly
+    if sudo smartctl -i /dev/$l | grep -q 'Solid State Device'; then
+      is_ssd=true
+    else
+      is_ssd=false
+    fi
+    # Only continue if we received 3 SMART data points and drive is not SSD
+    if [ "$outputcount" = "3" ] && [ "$is_ssd" = false ]
+    then
+      read reallocated pending uncorrect <<< $output
+      if [ "$reallocated" != "0" ]
+      then
+        messages+=("Drive $l has $reallocated reallocated sectors")
+        failed=true
+        # A small number of reallocated sectors is OK
+        if [ "$reallocated" -le 5 ]
+        then
+          rc=1 # Warn if <= 5
+        else
+          rc=2 # Crit if >5
+        fi
+      fi
+      if [ "$pending" != "0" ]
+      then
+        messages+=("Drive $l has $pending pending sectors")
+        failed=true
+        rc=2
+      fi
+      if [ "$uncorrect" != "0" ]
+      then
+        messages+=("Drive $l has $uncorrect uncorrect sectors")
+        failed=true
+        rc=2
+      fi
+    elif [ "$outputcount" != "3" ] && [ "$is_ssd" = false ]
+    then
+      messages+=("Drive $l returned $outputcount of 3 expected attributes")
+      unknownmsg="SMART data could not be read for one or more drives"
+      rc=3
+    # Set no return code and assume any SSD is healthy for now
+    elif [ "$is_ssd" = true ]
+    then
+      messages+=("Drive $l is an SSD.  Not yet supported.")
+      rc=0
+    else
+      messages+=("Error processing data for drive $l")
+      rc=3
+    fi
+    # Make sure drives with multiple types of bad sectors only get counted once
+    if [ "$failed" = true ]
+    then
+      let "failingdrives+=1"
+    fi
+  done
+}
+
+## Call main() function
+main
diff --git a/roles/common/files/nagios/nrpe.te b/roles/common/files/nagios/nrpe.te
new file mode 100644
index 0000000..c7bc886
--- /dev/null
+++ b/roles/common/files/nagios/nrpe.te
@@ -0,0 +1,12 @@
+module nrpe 1.0;
+
+require {
+	type fsadm_exec_t;
+	type nrpe_t;
+	type hwdata_t;
+	class file { read getattr open };
+}
+
+#============= nrpe_t ==============
+allow nrpe_t fsadm_exec_t:file getattr;
+allow nrpe_t hwdata_t:file { read getattr open };
diff --git a/roles/common/files/sbin/cli64 b/roles/common/files/sbin/cli64
new file mode 100644
index 0000000..7ef82de
Binary files /dev/null and b/roles/common/files/sbin/cli64 differ
diff --git a/roles/common/files/sbin/megacli b/roles/common/files/sbin/megacli
new file mode 100755
index 0000000..50bf00b
Binary files /dev/null and b/roles/common/files/sbin/megacli differ
diff --git a/roles/common/handlers/main.yml b/roles/common/handlers/main.yml
new file mode 100644
index 0000000..e2563ef
--- /dev/null
+++ b/roles/common/handlers/main.yml
@@ -0,0 +1,5 @@
+---
+- name: restart nagios-nrpe-server
+  service:
+    name: "{{ nrpe_service_name }}"
+    state: restarted
diff --git a/roles/common/tasks/disk_monitoring.yml b/roles/common/tasks/disk_monitoring.yml
new file mode 100644
index 0000000..2d06a17
--- /dev/null
+++ b/roles/common/tasks/disk_monitoring.yml
@@ -0,0 +1,33 @@
+---
+# We use these scripts to check to see if any of our test nodes have bad disks
+
+- name: Upload megacli and cli64 for raid monitoring and smart.pl to /usr/sbin/.
+  copy:
+    src: "../files/sbin/{{ item }}"
+    dest: "/usr/sbin/{{ item }}"
+    owner: root
+    group: root
+    mode: 0755
+  with_items:
+    - megacli
+    - cli64
+
+- name: Create /usr/libexec.
+  file:
+    path: /usr/libexec
+    owner: root
+    group: root
+    mode: 0755
+    state: directory
+
+- name: Upload custom netsaint scripts for raid/disk/smart/monitoring to /usr/libexec/.
+  copy:
+    src: "../files/libexec/{{ item }}"
+    dest: "/usr/libexec/{{ item }}"
+    owner: root
+    group: root
+    mode: 0755
+  with_items:
+    - smart.sh
+    - raid.pl
+    - diskusage.pl
diff --git a/roles/common/tasks/main.yml b/roles/common/tasks/main.yml
index bf065e3..204cc68 100644
--- a/roles/common/tasks/main.yml
+++ b/roles/common/tasks/main.yml
@@ -26,3 +26,26 @@
 - include: kerberos.yml
   tags:
     - kerberos
+
+# upload custom disk monitoring scripts
+- include: disk_monitoring.yml
+  tags:
+    - monitoring-scripts
+
+# configure nagios
+- include: nagios.yml
+  tags:
+    - nagios
+
+- name: Get SELinux status
+  command: getenforce
+  register: selinux_status
+  when: ansible_pkg_mgr == "yum"
+  tags:
+    - nagios
+
+# configure selinux for nagios
+- include: nrpe-selinux.yml
+  when: selinux_status is defined and selinux_status.stdout != "Disabled"
+  tags:
+    - nagios
diff --git a/roles/common/tasks/nagios.yml b/roles/common/tasks/nagios.yml
new file mode 100644
index 0000000..259a229
--- /dev/null
+++ b/roles/common/tasks/nagios.yml
@@ -0,0 +1,54 @@
+---
+- name: Upload nagios sudoers.d for raid utilities.
+  template:
+    src: nagios/90-nagios
+    dest: /etc/sudoers.d/90-nagios
+    owner: root
+    group: root
+    mode: 0440
+    validate: visudo -cf %s
+
+- name: Configure nagios nrpe settings (Ubuntu)
+  lineinfile:
+    dest: /etc/default/{{ nrpe_service_name }}
+    regexp: "^DAEMON_OPTS"
+    line: "DAEMON_OPTS=\"--no-ssl\""
+  when: ansible_pkg_mgr == "apt"
+
+- name: Configure nagios nrpe settings (RHEL/CentOS)
+  lineinfile:
+    dest: /etc/sysconfig/{{ nrpe_service_name }}
+    regexp: "^NRPE_SSL_OPT"
+    line: "NRPE_SSL_OPT=\"-n\""
+  when: ansible_pkg_mgr == "yum"
+
+- name: Check firewalld status
+  command: systemctl status firewalld
+  register: firewalld
+  ignore_errors: true
+  no_log: true
+  when: ansible_pkg_mgr == "yum"
+
+- name: Open nrpe port if firewalld enabled
+  firewalld:
+    port: 5666/tcp
+    state: enabled
+    permanent: yes
+    immediate: yes
+  when: ansible_pkg_mgr == "yum" and (firewalld is defined and firewalld.stdout.find('running') != -1)
+
+- name: Upload nagios nrpe config.
+  template:
+    src: nagios/nrpe.cfg 
+    dest: /etc/nagios/nrpe.cfg
+    owner: root
+    group: root
+    mode: 0644
+  notify:
+    - restart nagios-nrpe-server
+
+- name: Make sure nagios nrpe service is running.
+  service:
+    name: "{{ nrpe_service_name }}"
+    enabled: yes
+    state: started
diff --git a/roles/common/tasks/nrpe-selinux.yml b/roles/common/tasks/nrpe-selinux.yml
new file mode 100644
index 0000000..877aa2e
--- /dev/null
+++ b/roles/common/tasks/nrpe-selinux.yml
@@ -0,0 +1,44 @@
+---
+- name: nrpe - Install semanage python bindings
+  yum:
+    pkg: libsemanage-python
+    state: installed
+
+- name: nrpe - Install SELinux tools
+  yum:
+    pkg: policycoreutils-python
+    state: installed
+
+- name: nrpe - Ensure SELinux policy is up to date
+  yum:
+    pkg: selinux-policy-targeted
+    state: latest
+
+- name: nrpe - Set SELinux boolean nagios_run_sudo true
+  seboolean:
+    name: nagios_run_sudo
+    state: yes
+    persistent: yes
+
+- name: nrpe - Remove SELinux policy package
+  command: semodule -r nrpe
+  failed_when: false
+
+- name: nrpe - Copy SELinux type enforcement file
+  copy:
+    src: nagios/nrpe.te
+    dest: /tmp/nrpe.te
+
+- name: nrpe - Compile SELinux module file
+  command: checkmodule -M -m -o /tmp/nrpe.mod /tmp/nrpe.te
+
+- name: nrpe - Build SELinux policy package
+  command: semodule_package -o /tmp/nrpe.pp -m /tmp/nrpe.mod
+
+- name: nrpe - Load SELinux policy package
+  command: semodule -i /tmp/nrpe.pp
+
+- name: nrpe - Remove temporary files
+  file:
+    path: /tmp/nrpe.*
+    state: absent
diff --git a/roles/common/templates/nagios/90-nagios b/roles/common/templates/nagios/90-nagios
new file mode 100644
index 0000000..34326fb
--- /dev/null
+++ b/roles/common/templates/nagios/90-nagios
@@ -0,0 +1,2 @@
+## {{ ansible_managed }}
+{{ nrpe_user }} ALL=NOPASSWD: /usr/sbin/megacli, /usr/sbin/cli64, /usr/sbin/smartctl, /usr/sbin/smartctl
diff --git a/roles/common/templates/nagios/nrpe.cfg b/roles/common/templates/nagios/nrpe.cfg
new file mode 100644
index 0000000..84435c8
--- /dev/null
+++ b/roles/common/templates/nagios/nrpe.cfg
@@ -0,0 +1,29 @@
+# {{ ansible_managed }}
+log_facility=daemon
+pid_file=/var/run/nagios/nrpe.pid
+server_port=5666
+nrpe_user={{ nrpe_user }}
+nrpe_group={{ nrpe_group }}
+
+# These should eventually be in a secrets group_var
+# 172. address is sepia nagios server
+# 10. address is octo nagios server
+allowed_hosts=127.0.0.1,172.21.0.33,10.8.0.8
+dont_blame_nrpe=0
+debug=0
+command_timeout=60
+connection_timeout=300
+
+command[check_users]={{ nagios_plugins_directory }}/check_users --warning=5 --critical=10
+command[check_load]={{ nagios_plugins_directory }}/check_load --percpu --warning=1.5,1.4,1.3 --critical=2.0,1.9,1.8
+command[check_hda1]={{ nagios_plugins_directory }}/check_disk --warning=20% --critical=10% --partition=/dev/hda1
+command[check_root]={{ nagios_plugins_directory }}/check_disk --warning=10% --critical=5% --units=GB --path=/
+command[check_zombie_procs]={{ nagios_plugins_directory }}/check_procs --warning=5 --critical=10 --state=Z
+command[check_total_procs]={{ nagios_plugins_directory }}/check_procs --warning=300 --critical=500
+command[check_raid]=/usr/libexec/raid.pl
+command[check_disks]=/usr/libexec/diskusage.pl 90 95
+command[check_smart]=/usr/libexec/smart.sh
+
+include=/etc/nagios/nrpe_local.cfg
+
+include_dir=/etc/nagios/nrpe.d/
diff --git a/roles/common/vars/apt_systems.yml b/roles/common/vars/apt_systems.yml
new file mode 100644
index 0000000..066314d
--- /dev/null
+++ b/roles/common/vars/apt_systems.yml
@@ -0,0 +1,5 @@
+---
+nrpe_service_name: nagios-nrpe-server
+nrpe_user: nagios
+nrpe_group: nagios
+nagios_plugins_directory: /usr/lib/nagios/plugins
diff --git a/roles/common/vars/yum_systems.yml b/roles/common/vars/yum_systems.yml
new file mode 100644
index 0000000..d7b4ed2
--- /dev/null
+++ b/roles/common/vars/yum_systems.yml
@@ -0,0 +1,5 @@
+---
+nrpe_service_name: nrpe
+nrpe_user: nrpe
+nrpe_group: nrpe
+nagios_plugins_directory: /usr/lib64/nagios/plugins
diff --git a/roles/testnode/files/libexec/diskusage.pl b/roles/testnode/files/libexec/diskusage.pl
deleted file mode 100644
index 49200da..0000000
--- a/roles/testnode/files/libexec/diskusage.pl
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/usr/bin/perl
-
-# {{ ansible_managed }}
-
-#******************************************************************************************
-#
-# NRPE DISK USAGE PLUGIN
-#
-# Program: Disk Usage plugin written to be used with Netsaint and NRPE
-# License: GPL
-# Copyright (c) 2000 Jeremy Hanmer (jeremy@newdream.net)
-#
-# Last Modified: 10/23/00
-# 
-# Information:  Basically, I wrote this because I had to deal with large numbers of 
-# machines with a wide range of disk configurations, and with dynamically mounted 
-# partitions.  The basic check_disk plugin relied on a static configuration file which
-# doesn't lend itself to being used in a heterogeneous environnment (especially when
-# you can't guarantee that the devices listed in the configuration file will be mounted).
-#
-# Bugs:  Currently, this plugin only works on EXT2 partitions (although it's easy to change).
-#
-# Command Line: diskusage.pl <warning percentage> <critical percentage>
-#
-# Tested Systems:  Mandrake 7.1/Intel, Debian 2.2/Intel, Debian 2.1/Intel
-#
-# License Information:
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-#
-#*******************************************************************************************
-
-
-use strict;
-
-my $wrn = shift @ARGV;
-my $crt = shift @ARGV;
-my $output;
-my $count;
-my %type;
-my $result = 0;
-my $warn = 0;
-my $crit = 0;
-my @parts;
-my $hostname = `hostname`;
-chomp $hostname;
-@parts = `mount | grep -vi fuse`;
-
-#if ( $hostname eq 'zartan' ) {
-#	@parts = `mount`;
-#}
-#else {
-#	@parts = `mount -t ext2,reiserfs`;
-#}
-for (@parts) {
-	my ($dev,$on,$mount,$tp,$type,$options) = split(/\s+/,$_);
-		next if ($type eq 'nfs' && !($hostname eq 'zartan'));
-		next if ($type eq 'proc' || $type eq 'devpts');
-		my @df= `df -k $mount`;
-		my @df_inode = `df -i $mount`;
-#		print "$dev $mount $type\n";
-		shift @df;
-		shift @df_inode;
-		for(@df) {
-			my ($dev1,$blocks,$used,$free,$pc,$mount) = split(/\s+/,$_);
-			my ($percent,$blah) = split(/\%/,$pc);
-			if ( ($percent >= $wrn ) && (!($percent >= $crt) || ($mount =~ m/\/mnt\//)) ) {
-				$output .= "$mount is at $pc    ";
-				$warn = 1;
-			}
-			if ( ($percent >= $crt ) && !($mount =~ m/\/mnt\//) ){
-				$output = "" unless $crit eq '1';
-				$output .= "$mount is at $pc    ";
-				$crit = 1;
-			}
-		}
-		for(@df_inode) {
-			my ($dev1,$inodes,$used,$free,$pc,$mount) = split(/\s+/,$_);
-			my ($percent,$blah) = split(/\%/,$pc);
-			if ( ($percent >= $wrn ) && (!($percent >= $crt) ) ) {
-				$output .= "$mount is at $pc inode usage    ";
-				$warn = 1;
-			}
-			if ( ($percent >= $crt ) && !($mount =~ m/\/mnt\//) ){
-				$output = "" unless $crit eq '1';
-				$output .= "$mount is at $pc inode usage    ";
-				$crit = 1;
-			}
-		}
-	}
-
-
-#if ( ($warn eq '1') && !($crit eq '1') )  {
-#	print "$output\n";
-#	$result = 1;
-#	}
-if ( $crit eq '1' ) {
-	print "$output\n";
-	$result = 2;
-}
-
-else {
-	print "Disks are OK now\n";
-}
-
-
-#if ( !( $crit eq '1' ) && !( $warn eq '1' ) ) {
-#	print "Disks are ok now\n";
-#}
-#print "$result\n";
-exit $result; 
diff --git a/roles/testnode/files/libexec/raid.pl b/roles/testnode/files/libexec/raid.pl
deleted file mode 100755
index f65eedd..0000000
--- a/roles/testnode/files/libexec/raid.pl
+++ /dev/null
@@ -1,313 +0,0 @@
-#!/usr/bin/perl
-
-# {{ ansible_managed }}
-
-use strict;
-
-my $warn;
-my $crit;
-my $out;
-
-my @out;
-my $devices;
-my $pci;
-my $scsi;
-my $derp;
-
-$pci = `/usr/bin/lspci | /bin/grep -i raid | /bin/grep -v PATA | /usr/bin/head -2`;
-$scsi = `/usr/bin/lspci | /bin/grep -i scsi | /bin/grep -v PATA | /usr/bin/head -1`;
-
-# software raid!
-if (-e "/proc/mdstat") {
-    # check software raid!
-#    open(R,"/tmp/mdstat");
-    open(R,"/proc/mdstat");
-    while (<R>) {
-		if (/^(md\d+) : (\w+)/) {
-			my $dev = $1;
-			my $status = $2;
-			my $rest = <R>;
-			$devices++;
-			
-			my ($disks,$states) = $rest =~ /(\[.*\]) (\[.*\])/;
-			my $mout .= "$dev is $status $disks $states" if $states =~ /_/;
-			
-			# recovery?
-			my $next = <R>;  # possibly recovery?
-			if ($next =~ / recovery = /) {
-				my ($progress,$per) = $next =~ /(\[.*\])\s+recovery =\s+(\S+%)/;
-				$mout .= " recovery $per";
-				my $next = <R>;
-				if (my ($finish,$speed) = $next =~ /finish=(.*)min speed=(.*)\/sec/) {
-					$mout .= " finish $finish min";
-				}
-				$warn = 1;
-            } elsif ($next =~ / resync = /) {
-                my ($progress,$per) = $next =~ /(\[.*\])\s+resync =\s+(\S+%)/;
-                $mout .= " resync $per";
-                if (my ($finish,$speed) = $next =~ /finish=(.*)min speed=(.*)\/sec/) {
-                    $mout .= " finish $finish min";
-                }
-                $warn = 1;
-			} elsif ($states =~ /_/) {  # not all U
-				$crit = 1;
-			}
-			
-			push( @out, $mout ) if $mout;
-		}
-    }
-}
-
-
-# mylex raid!
-if ($pci =~ /Mylex/i) {
-#if (1) {
-    my $s = `cat /proc/rd/status`;
-    chomp($s);
-    unless ($s =~ /OK/) {
-	my @myinfo;
-	for my $ctl (`ls -d /proc/rd/c*`) {
-#	for my $ctl ('/proc/rd/c0') {
-	    chomp $ctl;
-	    my %bad;
-	    my ($c) = $ctl =~ /\/(c\d)$/;
-	    open(S,"$ctl/current_status") || print "can't open $ctl/current_status\n";;
-#	    open(S,"/tmp/mylex.bad");
-	    my $lastdevice;
-	    while (<S>) {
-		# disk status
-		if (/^    (\d:\d)  Vendor/) {
-		    $lastdevice = $1;
-		}
-		if (/ Disk Status: (\S+),/) {
-		    if ($1 ne 'Online') {
-			push( @myinfo, "$c disk $lastdevice $1");
-		    }
-		}
-
-		# logical drives
-		if (/    (\/dev\/rd\/\S+): (\S+), (\w+),/) {
-		    my $dev = $1;
-		    my $type = $2;
-		    my $status = $3;
-		    $devices++;
-		    $bad{$dev} = 1;
-		    if ($status ne 'Online') {
-			push( @myinfo, "$dev ($type) $status");
-		    }
-		}
-
-		# rebuild?
-		if (/  Rebuild in Progress: .* \((\S+)\) (\d+%) completed/) {
-		    push( @myinfo, "$1 rebuild $2 complete" );
-		    delete $bad{$1};
-		}
-	    }
-	    if (keys %bad) {
-		$crit = 1;  # at least 1 is failed and !recovering
-	    } else {
-		$warn = 1;   # all are recovering
-	    }
-	}
-
-	push( @out, "Mylex $s: " . join(', ',@myinfo)) if @myinfo;
-    }
-}
-
-
-# icp vortex raid!
-if ( $pci =~ /intel/i) {
-    opendir(D,"/proc/scsi/gdth");
-    my @dev = readdir(D);
-    closedir D;
-    my @vortex;
-    for my $dev (@dev) {
-	next if $dev =~ /^\./;
-	my $read = `cat /proc/scsi/gdth/$dev`;
-	# my $read = `cat /tmp/asdf9.warn`;
-	my $cur;   # Logical | Physical | Host | Array
-	my @myinfo;
-#	print "dev $dev\n";
-	for $_ (split(/\n/,$read)) {
-	    chomp;
-	    if (/^\w/) {
-		# new section
-		($cur) = /^(\w+)/;
-#		print "cur = $cur\n";
-		next;
-	    }
-	    if ($cur eq 'Logical') {
-		my ($num,$status) = /Number:\s+(\d+)\s+Status:\s+(\w+)/;
-		next unless $status;
-		if ($status ne 'ok') {
-		    $warn = 1;
-		    #push( @myinfo, "Logical #$num $status" );
-		    unshift( @myinfo, "Logical #$num $status" );
-		}
-	    }
-	    if ($cur eq 'Array') {
-		my ($num,$status) = /Number:\s+(\d+)\s+Status:\s+(\w+)/;
-		next unless $status;
-		if ($status ne 'ready') {
-		    $warn = 1;
-		    #push( @myinfo, "Array #$num $status" );
-		    unshift( @myinfo, "Array #$num $status" );
-		}
-	    }
-	    if ($cur eq 'Host') {
-		if (/Number/) {
-		    $devices++;
-		}
-	    }
-	    if ($cur eq 'Controller') {
-		# push( @myinfo, $_ );
-		unshift( @myinfo, $_ );
-	    }
-	}
-	
-	if (@myinfo) {
-	    # push( @vortex, "dev $dev: " . join(', ', @myinfo) );
-	    # unshift( @vortex, "dev $dev: " . join(', ', @myinfo) );
-	    push( @vortex, "dev $dev: " . join(', ', $myinfo[0], $myinfo[1], $myinfo[2], $myinfo[3], $myinfo[4] ) );
-	    # $warn = 1;
-	}
-    }
-
-    if (@vortex) {
-	# push( @out, 'Vortex: ' . join('.   ', @vortex) );
-	push( @out, 'Vortex: ' . join('.   ', @vortex) );
-    }
-}
-# SAS megaraid
-if ( $pci =~ /LSI\ Logic/i) {
-    my $read = `/usr/bin/sudo /usr/sbin/megacli -LDInfo -lall -a0`;
-    for $_ (split(/\n/,$read)) {
-    	chomp;
-	# The line we care about is State: Optimal, if we don't have that, we've problems
-	if ($_ =~/^State\s*\:\s*(.*)/m) {
-            $devices++;
-	    #/^State\?:\s?(\w+)/;
-	    my $state = $1;
-	    next unless $state;
-	    if ($state ne 'Optimal') {
-		my $rebuild = `/usr/bin/sudo /usr/sbin/megacli -PDList -a0 | /bin/grep -i firmware`;
-			if ( $rebuild =~ /Rebuild/i) {
-				my $enclosure = `/usr/bin/sudo /usr/sbin/megacli -PDList -a0 | /bin/grep -B15 Rebuild | /bin/grep -e Enclosure -e Slot | /usr/bin/cut -d':' -f2 | /usr/bin/awk '{printf \$1\":\"}' | /usr/bin/awk -F ":" '{printf \$1":"\$2}'`;
-				#my $rebuildstatus = `/usr/bin/sudo /usr/sbin/megacli -PDRbld -ShowProg -PhysDrv\[$enclosure\] -a0 | /bin/grep -i rebuild`;
-				my $rebuildstatus = `/usr/bin/sudo /usr/sbin/megacli -PDRbld -ShowProg -PhysDrv\[$enclosure\] -a0 | /bin/egrep -i \'\(rebuild\|not found\)\'`;
-				if ($rebuildstatus =~ /not found/m) {
-				   # check by device id instead of enclosure id if we get a not found error above
-				   $enclosure = `/usr/bin/sudo /usr/sbin/megacli -PDList -a0 | /bin/grep -B15 Rebuild | /bin/grep -e Enclosure -e Slot | /bin/grep -v position | /usr/bin/cut -d':' -f2 | /usr/bin/awk '{printf \$1\":\"}' | /usr/bin/awk -F ":" '{printf \$1":"\$2}'`;
-				   $rebuildstatus = `/usr/bin/sudo /usr/sbin/megacli -PDRbld -ShowProg -PhysDrv\[$enclosure\] -a0 | /bin/grep -i rebuild`;
-				}
-					for $_ ($rebuildstatus) {
-					$crit = 1;
-					push(@out,$_);
-					}
-			} else {
-	        $crit = 1;
-                my $virtual=`/usr/bin/sudo /usr/sbin/megacli -LDInfo -lall -a0 | grep -i failed -B6 | grep -i virtual | cut -d'(' -f1`;
-		push(@out, $virtual, $_);
-		}
-	    }
-	}	
-        # Should to catch the syntax or permissions errors this thing spits out
-	if (/ERROR/i) {
-	    $crit = 1;
-	    push(@out, $_);
-	foreach my $k (@out)
-	{
-		print $_;
-	}
-	}
-    }
-}
-
-# e3ware
-if ( $pci =~ /3ware/i) {
-	open(CLI,"/usr/bin/sudo /usr/sbin/tw_cli show|");
-	#my $read = `/usr/sbin/megacli -LDInfo -l0 -a0`;
-
-	$devices++;
-	my @controllers;
-	while (<CLI>) {
-		if ( $_ =~ /^c[0-9]/ ) {
-			my ($c) = split(/\s+/,$_);
-			push(@controllers,$c);
-		}
-	}
-	close(CLI);
-
-	foreach my $cont (@controllers) {
-		open(CLI,"/usr/bin/sudo /usr/sbin/tw_cli /$cont show|");
-		while (<CLI>) {
-			if ( $_ =~ /^u[0-9]+/ ) {
-				my @info = split(/\s+/,$_);
-				if ( $info[2] ne 'OK' ) {
-					if ( $info[2] =~ /REBUILDING/i) {
-						my $rebuildstatus = `/usr/bin/sudo /usr/sbin/tw_cli /$cont/$info[0] show | /bin/grep REBUILD | /bin/grep -v RAID-10`;
-							for $_ ($rebuildstatus) {
-							$crit = 1;
-							push(@out,$_);
-							}
-					} else {
-					$crit = 1;
-					push(@out,$_);
-					}
-				}
-			}
-			if ( $_ =~ /^p[0-9]+/ ) {
-				my @info = split(/\s+/,$_);
-				if ( $info[1] ne 'OK' ) {
-					$crit = 1;
-					push(@out,$_);
-				}
-			}
-		}
-	}	
-}
-
-#Areca
-
-if ( $pci =~ /areca/i) {
-                open(CLI,"sudo /usr/sbin/cli64 vsf info|");
-                while (<CLI>) {
-                        if ( $_ =~ /^\ \ [0-9]+/ ) {
-				$devices++;
-                                my @info = split(/\s+/,$_);
-				if ( $_ !~ /Normal/i) {
-                                        $crit = 1;
-                                        push(@out,$_);
-                                }
-                        }
-                }
-        }
-
-if ( $scsi =~ /LSI Logic/i) {
-                open(CLI,"sudo /usr/sbin/mpt-status | /usr/bin/head -1 |");
-                $devices++;
-                while (<CLI>) {
-                        if ( $_ =~ /^ioc/ ) {
-                                my @info = split(/\s+/,$_);
-                                if ( $info[10] ne 'OPTIMAL,' ) {
-                                        $crit = 1;
-                                        push(@out,$_);
-                                }
-                        }
-                }
-        }
-
-# show results
-my $result = 0;
-$result = 1 if $warn;
-$result = 2 if $crit;
-# print "warn = $warn crit = $crit\n";
-print $derp;
-my $out = "No raid devices found $pci";
-$out = "All $devices raid devices happy as clams" if $devices;
-if (@out) {
-    $out = join(';     ', @out);  
-}
-
-print "$out\n";
-exit $result;
diff --git a/roles/testnode/files/libexec/smart.sh b/roles/testnode/files/libexec/smart.sh
deleted file mode 100755
index 2f71a60..0000000
--- a/roles/testnode/files/libexec/smart.sh
+++ /dev/null
@@ -1,290 +0,0 @@
-#!/bin/bash
-# Description:	Bash script to check drive health using pending, uncorrectable,
-# and reallocated sector count
-#
-# Nagios return codes: 0 = OK; 1 = WARNING; 2 = CRITICAL; 3 = UNKNOWN
-# SMART Attribute Codes:
-#   5 = Reallocated
-#   187 = Reported Uncorrect
-#   197 = Pending
-#   198 = Uncorrectable Sector Count
-#
-# TO-DO: Add support for dynamic SMART attribute lookup.  For example,
-#        187 is reported for Seagate HDD and all SSDs but not Hitachi HDDs.
-#
-# See https://en.wikipedia.org/wiki/S.M.A.R.T.#ATA_S.M.A.R.T._attributes
-
-### Define global variables ###
-# total number of drives (or RAID slots) discovered
-numdrives=0
-# Number of failed, failing, and/or missing drives
-failingdrives=0
-# Fallback message for UNKNOWN return code output
-unknownmsg="Unknown error"
-# Return code for nagios (Default to SUCCESS)
-rc=0
-# Array of messages indicating drive health.  Output after nagios status.
-declare -a messages
-
-### Functions ###
-main ()
-{
-  preflight
-
-  if [ "$raid" = true ]
-  then
-    areca_smart
-    areca_failed
-  elif [ "$raid" = false ]
-  then
-    normal_smart
-  else
-    echo "ERROR - Could not determine if RAID present"
-    exit 3
-  fi
-
-  ## Return UNKNOWN if no drives found
-  if [ "$numdrives" -eq "0" ]
-  then
-    unknownmsg="No drives found!"
-    rc=3
-  fi
-  
-  ## Return code and service status for nagios
-  if [ "$rc" = 0 ]
-  then
-    echo "OK - All $numdrives drives healthy"
-  elif [ "$rc" = 1 ]
-  then
-    echo "WARNING - $failingdrives of $numdrives drives sick"
-  elif [ "$rc" = 2 ]
-  then
-    echo "CRITICAL - $failingdrives of $numdrives drives need replacing"
-  elif [ "$rc" = 3 ]
-  then
-    echo "UNKNOWN - $unknownmsg"
-  else
-    echo "ERROR - Got no return code"
-  fi
-  
-  ## Iterate through array of messages
-  # Nagios reads and displays the first line of output on the Services page.
-  # All individual messages about failed/failing disk statistics can be viewed
-  # on the individual system's SMART detail page in nagios.
-  for msg in "${messages[@]}"
-  do
-    echo "$msg"
-  done
-  
-  exit $rc
-}
-
-# Pre-flight checks
-preflight ()
-{
-  # Set raid var then check for cli64 command and bail if missing
-  if lspci | grep -qi areca
-  then
-    raid=true
-  else
-    raid=false
-  fi
-  
-  if [ "$raid" = true ] && ! [ -x "$(command -v cli64)" ]
-  then
-    echo "ERROR - cli64 command not found or is not executable"
-    exit 3
-  fi
-  
-  # Check for smartmontools and bail if missing
-  if ! [ -x "$(command -v smartctl)" ]
-  then
-    echo "ERROR - smartctl is not installed or is not executable"
-    echo "yum/apt-get install smartmontools"
-    exit 3
-  fi
-}
-
-# Gather smart data for drives behind Areca RAID controller
-areca_smart ()
-{
-  # Store output of cli64 to reduce repeated executions
-  cli64out=$(sudo cli64 disk info | grep -E "Slot#[[:digit:]]")
-  numdrives=$(echo "$cli64out" | wc -l)
-  # Loop through all disks not marked as 'N.A.' or 'Failed'
-  for slot in $(echo "$cli64out" | grep -v 'N.A.\|Failed' \
-  | grep -o "Slot#[[:digit:]]" | cut -c6-)
-  do
-    failed=false
-    # Determine if disk is JBOD or part of hardware RAID
-    if echo "$cli64out" | grep -E "Slot#$slot" | grep -q 'JBOD'
-    then
-      jbod=true
-    else
-      jbod=false
-    fi
-    output=$(sudo cli64 disk smart drv=$slot \
-    | grep -E "^  "5"|^"197"|^"198"" | awk '{ print $(NF-1) }' | tr '\n' ' ')
-    outputcount=$(echo $output | wc -w)
-    # Only continue if we received 3 SMART data points
-    if [ "$outputcount" = "3" ]
-    then
-      # Only do slot to drive letter matching once per bad JBOD
-      if [[ $output != "0 0 0 " ]] && [ "$jbod" = true ]
-      then
-        dl=$(areca_bay_to_letter $slot)
-      elif [ "$jbod" = false ]
-      then
-        dl="(RAID)"
-      fi
-      read reallocated pending uncorrect <<< $output
-      if [ "$reallocated" != "0" ]
-      then
-        messages+=("Drive $slot $dl has $reallocated reallocated sectors")
-        failed=true
-        # A small number of reallocated sectors is OK
-        if [ "$reallocated" -le 5 ]
-        then
-          rc=1 # Warn if <= 5
-        else
-          rc=2 # Crit if >5
-        fi
-      fi
-      if [ "$pending" != "0" ]
-      then
-        messages+=("Drive $slot $dl has $pending pending sectors")
-        failed=true
-        rc=2
-      fi
-      if [ "$uncorrect" != "0" ]
-      then
-        messages+=("Drive $slot $dl has $uncorrect uncorrect sectors")
-        failed=true
-        rc=2
-      fi
-    else
-      messages+=("Drive $slot returned $outputcount of 3 expected attributes")
-      unknownmsg="SMART data could not be read for one or more drives"
-      rc=3
-    fi
-    # Make sure drives with multiple types of bad sectors only get counted once
-    if [ "$failed" = true ]
-    then
-      let "failingdrives+=1"
-    fi
-  done
-}
-
-# Correlate Areca drive bay to drive letter
-areca_bay_to_letter ()
-{
-  # Get S/N according to RAID controller given argument $1 (slot #)
-  areca_serial=$(sudo cli64 disk info drv=$1 | grep 'Serial Number' \
-  | awk '{ print $NF }')
-  # Loop through and get S/N according to smartctl given drive name
-  for dl in $(cat /proc/partitions | grep -w 'sd[a-z]\|sd[a-z]\{2\}' \
-  | awk '{ print $NF }')
-  do
-    smart_serial=$(sudo smartctl -a /dev/$dl | grep "Serial number" \
-    | awk '{ print $NF }')
-    # If cli64 and smartctl find a S/N match, return drive letter
-    if [ "$areca_serial" = "$smart_serial" ]
-    then
-      echo "($dl)"
-    fi
-  done
-}
-
-# Tally missing and failed drives connected to Areca RAID
-areca_failed ()
-{
-  # Store output of cli64 to reduce repeated executions
-  cli64out=$(sudo cli64 disk info | grep -E "Slot#[[:digit:]]")
-  # Missing (N.A.) drives
-  for drive in $(echo "$cli64out" | grep -E "Slot#[[:digit:]]" \
-  | grep "N.A." | awk '{ print $1 }')
-  do
-    messages+=("Drive $drive is missing")
-    let "failingdrives+=1"
-    rc=2
-  done
-  # Hard failed drives
-  for drive in $(echo "$cli64out" | grep -E "Slot#[[:digit:]]" \
-  | grep 'Failed' | awk '{ print $1 }')
-  do
-    messages+=("Drive $drive failed")
-    let "failingdrives+=1"
-    rc=2
-  done
-}
-
-# Standard SATA/SAS drive smartctl check
-normal_smart ()
-{
-  # The grep regex will include drives named sdaa, for example
-  numdrives=$(cat /proc/partitions | grep -w 'sd[a-z]\|sd[a-z]\{2\}' | wc -l)
-  for l in $(cat /proc/partitions | grep -w 'sd[a-z]\|sd[a-z]\{2\}' \
-  | awk '{ print $NF }')
-  do
-    failed=false
-    output=$(sudo smartctl -a /dev/$l | grep -E "^  "5"|^"197"|^"198"" \
-    | awk '{ print $NF }' | tr '\n' ' ')
-    outputcount=$(echo $output | wc -w)
-    # Check if drive is SSD and set var accordingly
-    if sudo smartctl -i /dev/$l | grep -q 'Solid State Device'; then
-      is_ssd=true
-    else
-      is_ssd=false
-    fi
-    # Only continue if we received 3 SMART data points and drive is not SSD
-    if [ "$outputcount" = "3" ] && [ "$is_ssd" = false ]
-    then
-      read reallocated pending uncorrect <<< $output
-      if [ "$reallocated" != "0" ]
-      then
-        messages+=("Drive $l has $reallocated reallocated sectors")
-        failed=true
-        # A small number of reallocated sectors is OK
-        if [ "$reallocated" -le 5 ]
-        then
-          rc=1 # Warn if <= 5
-        else
-          rc=2 # Crit if >5
-        fi
-      fi
-      if [ "$pending" != "0" ]
-      then
-        messages+=("Drive $l has $pending pending sectors")
-        failed=true
-        rc=2
-      fi
-      if [ "$uncorrect" != "0" ]
-      then
-        messages+=("Drive $l has $uncorrect uncorrect sectors")
-        failed=true
-        rc=2
-      fi
-    elif [ "$outputcount" != "3" ] && [ "$is_ssd" = false ]
-    then
-      messages+=("Drive $l returned $outputcount of 3 expected attributes")
-      unknownmsg="SMART data could not be read for one or more drives"
-      rc=3
-    # Set no return code and assume any SSD is healthy for now
-    elif [ "$is_ssd" = true ]
-    then
-      messages+=("Drive $l is an SSD.  Not yet supported.")
-      rc=0
-    else
-      messages+=("Error processing data for drive $l")
-      rc=3
-    fi
-    # Make sure drives with multiple types of bad sectors only get counted once
-    if [ "$failed" = true ]
-    then
-      let "failingdrives+=1"
-    fi
-  done
-}
-
-## Call main() function
-main
diff --git a/roles/testnode/files/nagios/nrpe.te b/roles/testnode/files/nagios/nrpe.te
deleted file mode 100644
index c7bc886..0000000
--- a/roles/testnode/files/nagios/nrpe.te
+++ /dev/null
@@ -1,12 +0,0 @@
-module nrpe 1.0;
-
-require {
-	type fsadm_exec_t;
-	type nrpe_t;
-	type hwdata_t;
-	class file { read getattr open };
-}
-
-#============= nrpe_t ==============
-allow nrpe_t fsadm_exec_t:file getattr;
-allow nrpe_t hwdata_t:file { read getattr open };
diff --git a/roles/testnode/files/sbin/cli64 b/roles/testnode/files/sbin/cli64
deleted file mode 100644
index 7ef82de..0000000
Binary files a/roles/testnode/files/sbin/cli64 and /dev/null differ
diff --git a/roles/testnode/files/sbin/megacli b/roles/testnode/files/sbin/megacli
deleted file mode 100755
index 50bf00b..0000000
Binary files a/roles/testnode/files/sbin/megacli and /dev/null differ
diff --git a/roles/testnode/handlers/main.yml b/roles/testnode/handlers/main.yml
index a87e910..e3a24c6 100644
--- a/roles/testnode/handlers/main.yml
+++ b/roles/testnode/handlers/main.yml
@@ -25,8 +25,3 @@
   service:
     name: cron
     state: restarted
-
-- name: restart nagios-nrpe-server
-  service:
-    name: "{{ nrpe_service_name }}"
-    state: restarted
diff --git a/roles/testnode/tasks/disk_monitoring.yml b/roles/testnode/tasks/disk_monitoring.yml
deleted file mode 100644
index 2d06a17..0000000
--- a/roles/testnode/tasks/disk_monitoring.yml
+++ /dev/null
@@ -1,33 +0,0 @@
----
-# We use these scripts to check to see if any of our test nodes have bad disks
-
-- name: Upload megacli and cli64 for raid monitoring and smart.pl to /usr/sbin/.
-  copy:
-    src: "../files/sbin/{{ item }}"
-    dest: "/usr/sbin/{{ item }}"
-    owner: root
-    group: root
-    mode: 0755
-  with_items:
-    - megacli
-    - cli64
-
-- name: Create /usr/libexec.
-  file:
-    path: /usr/libexec
-    owner: root
-    group: root
-    mode: 0755
-    state: directory
-
-- name: Upload custom netsaint scripts for raid/disk/smart/monitoring to /usr/libexec/.
-  copy:
-    src: "../files/libexec/{{ item }}"
-    dest: "/usr/libexec/{{ item }}"
-    owner: root
-    group: root
-    mode: 0755
-  with_items:
-    - smart.sh
-    - raid.pl
-    - diskusage.pl
diff --git a/roles/testnode/tasks/main.yml b/roles/testnode/tasks/main.yml
index e04368e..033d386 100644
--- a/roles/testnode/tasks/main.yml
+++ b/roles/testnode/tasks/main.yml
@@ -72,29 +72,6 @@
   tags:
     - cpan
 
-# upload custom disk monitoring scripts
-- include: disk_monitoring.yml
-  tags:
-    - monitoring-scripts
-
-# configure nagios
-- include: nagios.yml
-  tags:
-    - nagios
-
-- name: Get SELinux status
-  command: getenforce
-  register: selinux_status
-  when: ansible_pkg_mgr == "yum"
-  tags:
-    - nagios
-
-# configure selinux for nagios
-- include: nrpe-selinux.yml
-  when: selinux_status is defined and selinux_status.stdout != "Disabled"
-  tags:
-    - nagios
-
 # configure ntp
 - include: ntp.yml
   tags:
diff --git a/roles/testnode/tasks/nagios.yml b/roles/testnode/tasks/nagios.yml
deleted file mode 100644
index 259a229..0000000
--- a/roles/testnode/tasks/nagios.yml
+++ /dev/null
@@ -1,54 +0,0 @@
----
-- name: Upload nagios sudoers.d for raid utilities.
-  template:
-    src: nagios/90-nagios
-    dest: /etc/sudoers.d/90-nagios
-    owner: root
-    group: root
-    mode: 0440
-    validate: visudo -cf %s
-
-- name: Configure nagios nrpe settings (Ubuntu)
-  lineinfile:
-    dest: /etc/default/{{ nrpe_service_name }}
-    regexp: "^DAEMON_OPTS"
-    line: "DAEMON_OPTS=\"--no-ssl\""
-  when: ansible_pkg_mgr == "apt"
-
-- name: Configure nagios nrpe settings (RHEL/CentOS)
-  lineinfile:
-    dest: /etc/sysconfig/{{ nrpe_service_name }}
-    regexp: "^NRPE_SSL_OPT"
-    line: "NRPE_SSL_OPT=\"-n\""
-  when: ansible_pkg_mgr == "yum"
-
-- name: Check firewalld status
-  command: systemctl status firewalld
-  register: firewalld
-  ignore_errors: true
-  no_log: true
-  when: ansible_pkg_mgr == "yum"
-
-- name: Open nrpe port if firewalld enabled
-  firewalld:
-    port: 5666/tcp
-    state: enabled
-    permanent: yes
-    immediate: yes
-  when: ansible_pkg_mgr == "yum" and (firewalld is defined and firewalld.stdout.find('running') != -1)
-
-- name: Upload nagios nrpe config.
-  template:
-    src: nagios/nrpe.cfg 
-    dest: /etc/nagios/nrpe.cfg
-    owner: root
-    group: root
-    mode: 0644
-  notify:
-    - restart nagios-nrpe-server
-
-- name: Make sure nagios nrpe service is running.
-  service:
-    name: "{{ nrpe_service_name }}"
-    enabled: yes
-    state: started
diff --git a/roles/testnode/tasks/nrpe-selinux.yml b/roles/testnode/tasks/nrpe-selinux.yml
deleted file mode 100644
index 877aa2e..0000000
--- a/roles/testnode/tasks/nrpe-selinux.yml
+++ /dev/null
@@ -1,44 +0,0 @@
----
-- name: nrpe - Install semanage python bindings
-  yum:
-    pkg: libsemanage-python
-    state: installed
-
-- name: nrpe - Install SELinux tools
-  yum:
-    pkg: policycoreutils-python
-    state: installed
-
-- name: nrpe - Ensure SELinux policy is up to date
-  yum:
-    pkg: selinux-policy-targeted
-    state: latest
-
-- name: nrpe - Set SELinux boolean nagios_run_sudo true
-  seboolean:
-    name: nagios_run_sudo
-    state: yes
-    persistent: yes
-
-- name: nrpe - Remove SELinux policy package
-  command: semodule -r nrpe
-  failed_when: false
-
-- name: nrpe - Copy SELinux type enforcement file
-  copy:
-    src: nagios/nrpe.te
-    dest: /tmp/nrpe.te
-
-- name: nrpe - Compile SELinux module file
-  command: checkmodule -M -m -o /tmp/nrpe.mod /tmp/nrpe.te
-
-- name: nrpe - Build SELinux policy package
-  command: semodule_package -o /tmp/nrpe.pp -m /tmp/nrpe.mod
-
-- name: nrpe - Load SELinux policy package
-  command: semodule -i /tmp/nrpe.pp
-
-- name: nrpe - Remove temporary files
-  file:
-    path: /tmp/nrpe.*
-    state: absent
diff --git a/roles/testnode/templates/nagios/90-nagios b/roles/testnode/templates/nagios/90-nagios
deleted file mode 100644
index 34326fb..0000000
--- a/roles/testnode/templates/nagios/90-nagios
+++ /dev/null
@@ -1,2 +0,0 @@
-## {{ ansible_managed }}
-{{ nrpe_user }} ALL=NOPASSWD: /usr/sbin/megacli, /usr/sbin/cli64, /usr/sbin/smartctl, /usr/sbin/smartctl
diff --git a/roles/testnode/templates/nagios/nrpe.cfg b/roles/testnode/templates/nagios/nrpe.cfg
deleted file mode 100644
index 84435c8..0000000
--- a/roles/testnode/templates/nagios/nrpe.cfg
+++ /dev/null
@@ -1,29 +0,0 @@
-# {{ ansible_managed }}
-log_facility=daemon
-pid_file=/var/run/nagios/nrpe.pid
-server_port=5666
-nrpe_user={{ nrpe_user }}
-nrpe_group={{ nrpe_group }}
-
-# These should eventually be in a secrets group_var
-# 172. address is sepia nagios server
-# 10. address is octo nagios server
-allowed_hosts=127.0.0.1,172.21.0.33,10.8.0.8
-dont_blame_nrpe=0
-debug=0
-command_timeout=60
-connection_timeout=300
-
-command[check_users]={{ nagios_plugins_directory }}/check_users --warning=5 --critical=10
-command[check_load]={{ nagios_plugins_directory }}/check_load --percpu --warning=1.5,1.4,1.3 --critical=2.0,1.9,1.8
-command[check_hda1]={{ nagios_plugins_directory }}/check_disk --warning=20% --critical=10% --partition=/dev/hda1
-command[check_root]={{ nagios_plugins_directory }}/check_disk --warning=10% --critical=5% --units=GB --path=/
-command[check_zombie_procs]={{ nagios_plugins_directory }}/check_procs --warning=5 --critical=10 --state=Z
-command[check_total_procs]={{ nagios_plugins_directory }}/check_procs --warning=300 --critical=500
-command[check_raid]=/usr/libexec/raid.pl
-command[check_disks]=/usr/libexec/diskusage.pl 90 95
-command[check_smart]=/usr/libexec/smart.sh
-
-include=/etc/nagios/nrpe_local.cfg
-
-include_dir=/etc/nagios/nrpe.d/
diff --git a/roles/testnode/vars/apt_systems.yml b/roles/testnode/vars/apt_systems.yml
index 15a1225..d03a888 100644
--- a/roles/testnode/vars/apt_systems.yml
+++ b/roles/testnode/vars/apt_systems.yml
@@ -2,10 +2,6 @@
 ntp_service_name: ntp
 ssh_service_name: ssh
 nfs_service: nfs-kernel-server
-nrpe_service_name: nagios-nrpe-server
-nrpe_user: nagios
-nrpe_group: nagios
-nagios_plugins_directory: /usr/lib/nagios/plugins
 
 ceph_packages_to_remove:
   - ceph
diff --git a/roles/testnode/vars/centos_6.yml b/roles/testnode/vars/centos_6.yml
index 596eaac..a94e7af 100644
--- a/roles/testnode/vars/centos_6.yml
+++ b/roles/testnode/vars/centos_6.yml
@@ -99,8 +99,6 @@ packages:
   # for java bindings, hadoop, etc.
   - java-1.7.0-openjdk-devel
   - junit4
-  # for disk/etc monitoring
-  - smartmontools
   # for nfs
   - nfs-utils
 
@@ -121,6 +119,3 @@ epel_packages:
   - python-virtualenv
   # for setting BIOS settings
   - smbios-utils
-  # for nagios monitoring
-  - nrpe
-  - nagios-plugins-all
diff --git a/roles/testnode/vars/centos_7.yml b/roles/testnode/vars/centos_7.yml
index 9f169a9..8e33ab9 100644
--- a/roles/testnode/vars/centos_7.yml
+++ b/roles/testnode/vars/centos_7.yml
@@ -83,8 +83,6 @@ packages:
   # for java bindings, hadoop, etc.
   - java-1.6.0-openjdk-devel
   - junit4
-  # for disk/etc monitoring
-  - smartmontools
   # for nfs
   - nfs-utils
   # for xfstests
@@ -107,6 +105,3 @@ epel_packages:
   - bonnie++
   # for json_xs to investigate JSON by hand
   - perl-JSON-XS
-  # for nagios monitoring
-  - nrpe
-  - nagios-plugins-all
diff --git a/roles/testnode/vars/debian_7.yml b/roles/testnode/vars/debian_7.yml
index 568dd42..a881517 100644
--- a/roles/testnode/vars/debian_7.yml
+++ b/roles/testnode/vars/debian_7.yml
@@ -84,10 +84,6 @@ packages:
   - default-jdk
   - junit4
   ###
-  # for disk/etc monitoring
-  - smartmontools
-  - nagios-nrpe-server
-  ###
   # for samba testing
   - cifs-utils
   ###
diff --git a/roles/testnode/vars/debian_8.yml b/roles/testnode/vars/debian_8.yml
index 16edf7f..9038c28 100644
--- a/roles/testnode/vars/debian_8.yml
+++ b/roles/testnode/vars/debian_8.yml
@@ -77,10 +77,6 @@ packages:
   - default-jdk
   - junit4
   ###
-  # for disk/etc monitoring
-  - smartmontools
-  - nagios-nrpe-server
-  ###
   # for samba testing
   - cifs-utils
   ###
diff --git a/roles/testnode/vars/fedora_22.yml b/roles/testnode/vars/fedora_22.yml
index 2c911b6..31170f4 100644
--- a/roles/testnode/vars/fedora_22.yml
+++ b/roles/testnode/vars/fedora_22.yml
@@ -68,10 +68,6 @@ packages:
   # for java bindings, hadoop, etc.
   - java-1.8.0-openjdk-devel
   - junit
-  # for disk/etc monitoring
-  - nrpe
-  - nagios-plugins-all
-  - smartmontools
   # for nfs
   - nfs-utils
   # python-pip is installed via roles/testnode/tasks/pip.yml on other rpm-based distros
diff --git a/roles/testnode/vars/redhat_6.yml b/roles/testnode/vars/redhat_6.yml
index e274d36..422b8eb 100644
--- a/roles/testnode/vars/redhat_6.yml
+++ b/roles/testnode/vars/redhat_6.yml
@@ -86,8 +86,6 @@ packages:
   # for java bindings, hadoop, etc.
   - java-1.6.0-openjdk-devel
   - junit4
-  # for disk/etc monitoring
-  - smartmontools
   # for nfs
   - nfs-utils
 
@@ -108,8 +106,5 @@ epel_packages:
   - python-virtualenv
   # for setting BIOS settings
   - smbios-utils
-  # for nagios monitoring
-  - nrpe
-  - nagios-plugins-all
 
 nfs_service: nfs
diff --git a/roles/testnode/vars/redhat_7.yml b/roles/testnode/vars/redhat_7.yml
index a7bd3c7..1cb04f6 100644
--- a/roles/testnode/vars/redhat_7.yml
+++ b/roles/testnode/vars/redhat_7.yml
@@ -67,7 +67,6 @@ packages:
   - perl-XML-Twig
   - java-1.6.0-openjdk-devel
   - junit4
-  - smartmontools
   - nfs-utils
   # for xfstests
   - ncurses-devel
@@ -86,8 +85,5 @@ epel_packages:
   - perl-JSON-XS
   - leveldb
   - xmlstarlet
-  # for nagios monitoring
-  - nrpe
-  - nagios-plugins-all
 
 nfs_service: nfs-server
diff --git a/roles/testnode/vars/ubuntu.yml b/roles/testnode/vars/ubuntu.yml
index 52d3478..31cb4b8 100644
--- a/roles/testnode/vars/ubuntu.yml
+++ b/roles/testnode/vars/ubuntu.yml
@@ -83,10 +83,6 @@ common_packages:
   - tgt
   - open-iscsi
   ###
-  # for disk/etc monitoring
-  - smartmontools
-  - nagios-nrpe-server
-  ###
   # for samba testing
   - cifs-utils
   # for Static IP
diff --git a/roles/testnode/vars/yum_systems.yml b/roles/testnode/vars/yum_systems.yml
index e6652a6..433bdf5 100644
--- a/roles/testnode/vars/yum_systems.yml
+++ b/roles/testnode/vars/yum_systems.yml
@@ -1,10 +1,6 @@
 ---
 ntp_service_name: ntpd
 ssh_service_name: sshd
-nrpe_service_name: nrpe
-nrpe_user: nrpe
-nrpe_group: nrpe
-nagios_plugins_directory: /usr/lib64/nagios/plugins
 
 # ceph packages that we ensure do not exist
 ceph_packages_to_remove: