DBA Script
#!/bin/bash
# Purpose: System alert generator.
# Version: 2018.10.01
# Dependencies: inc_system.sh
######################################
# Preamble: 2018.10.01 #
######################################
set -a; DIR_SCRIPTS="/u01/app/scripts"; source $DIR_SCRIPTS/inc_system.sh
sFullName=$(basename "$0"); me=${sFullName%.*}
sSLog="$DIR_LOGS/$me.sess.log"; sHLog="$DIR_LOGS/$me.hist.log"; > $sSLog
if [[ $MAINT_WINDOW -eq 1 ]]; then printf "Maintenance Window Detected - Exiting\n"; exit; fi
# User Vars
usrRAM=64; # If RAM (mb) below this value alert triggered. Ex: 1024
usrCPU=25; # If CPU (%) above this value alert triggered. Ex: 90
usrFileSystems=(/ /u01 /u02 /u03); # Space delimited list of file systems. Ex: (/ /u01 /u02)
usrFreeSpace=10; # If usrFileSystems below this value (gb) alert triggered. Ex: 10
usrSystems=(mlbltdnasolov01)
usrPktChkList="10.230.2.45"; # Comma delimited list of IPs (no spaces).
usrPktSize=56; # Default 56 translates to 64 with the 8 byte ICMP header data. For RAC PrivIp use: 8972
usrEmailDelay=30; # How many minutes before email alert sent again (inhibits email alert spam)
usrEmailList="michaele@sccu.com"; # "OpNetTeam@sccu.com" "michaele@sccu.com"
optCPU=1;
optRAM=1;
optFileSys=1;
optPings=1;
optPktSize=1;
optNTP=1;
######################################
# Init Script Actions\Functions\Vars #
######################################
# Vars
IFS=","
fAlerts="$DIR_LOGS/$me.alerts.log"
sRAM=`awk '/^Mem/ {print $4}' <(free -m)`
sCPU=`mpstat | awk '$3 ~ /CPU/ { for(i=1;i<=NF;i++) { if ($i ~ /%idle/) field=i } } $3 ~ /all/ { print 100 - $field }'`
round () { nInteger=$1; echo $nInteger | awk '{print int($1+0.5)}'; }
sDateSN=`date "+%Y%m%d_%H%M"`
fLastEmail="$DIR_TMP/$me.email.flg"
! [[ -f $fLastEmail ]] && touch $fLastEmail -t 201712151500;
# Functions
alert() {
printf "`now`,$1\n" | tee -a $sSLog
nOK2Send=$(( (`date +%s` - `stat -L --format %Y $fLastEmail`) > ($usrEmailDelay*60) ));
[[ $nOK2Send -eq 1 ]] && mail -s "$me: $HOSTNAME" "$usrEmailList" <<< "$1";
[[ $nOK2Send -eq 1 ]] && touch $fLastEmail;
}
######################################
# Start #
######################################
clear; log "$sSLog" "$sFullName Started"; linesep
SysVars_show
# CPU
if [[ $optCPU -eq 1 ]]; then
log "$sSLog" "CPU: $sCPU pct ($usrCPU)"
nCPU1=$(round $sCPU); nCPU2=$(round $usrCPU); #printf "$nCPU1, $nCPU2\n";
if [[ $nCPU1 -gt $nCPU2 ]]; then
alert "$(hostname) CPU utilization $sCPU pct."
fi
fi
# RAM
if [[ $optRAM -eq 1 ]]; then
log "$sSLog" "RAM: $sRAM mb ($usrRAM)"
if [ $sRAM -le $usrRAM ]; then
alert "$(hostname) $sRAM mb free RAM."
fi
fi
# File System
if [[ $optFileSys -eq 1 ]]; then
log "$sSLog" "File System ($usrFreeSpace)"
for fs in ${usrFileSystems[}
do
mount | column -t|grep $fs &>/dev/null
if ; then
# Check Free Space
sFreeSpace=`df -P $fs | tail -1 | awk '{print $4}'`
sFSgb=`echo "scale=0; $sFreeSpace / 1024^2" | bc`
log "$sSLog" " $fs: $sFSgb gb"
if [ $sFSgb -le $usrFreeSpace ]; then
alert "`hostname` $fs has $sFSgb gigabytes free."
fi
else
# Cannot Access Alert
alert "`hostname` cannot access $fs."
fi
done
fi
- Dependent System Checks
if $optPings -eq 1 ?; then
log "$sSLog" "Dependent Systems Check"
for system in ${usrSystems]}
do
ping -q -c 2 $system > /dev/null 2>&1
if [[ $? -ne 0 ]]; then
alert "`hostname` cannot ping $system"
else
log "$sSLog" " $system: OK"
fi
done
fi
# Packet Size Checks (MTU etc.)
if [[ $optPktSize -eq 1 ]]; then
log "$sSLog" "Packet Size Check"
for i in $usrPktChkList
do
ping $i -c 1 -M do -s $usrPktSize > /dev/null 2>&1
if [[ $? -eq 0 ]]; then
log "$sSLog" " $i: OK"
else
alert " $i: Cannot connect using packet size of $usrPktSize"
fi
done
fi
# NTP
if [[ $optNTP -eq 1 ]]; then
ntpstat > /dev/null 2>&1
if [[ $? -eq 0 ]]; then
log "$sSLog" "NTP OK"
else
alert "`hostname` NTP not in sync."
fi
fi
######################################
# End #
######################################
log "$sSLog" "$sFullName Ended [Elapse Time: $(elapse)]";
echo $(linesep "=") >> $sHLog; cat $sSLog >> $sHLog;
tail -32768 $sHLog > $sHLog.tmp; mv $sHLog.tmp $sHLog