DBA Script
#!/bin/bash # Purpose: System alert generator. # Version: 2018.10.01 # Dependencies: inc_system.sh ###################################### # Preamble: 2018.10.01 # ###################################### set -a; DIR_SCRIPTS="/u01/app/scripts"; source $DIR_SCRIPTS/inc_system.sh sFullName=$(basename "$0"); me=${sFullName%.*} sSLog="$DIR_LOGS/$me.sess.log"; sHLog="$DIR_LOGS/$me.hist.log"; > $sSLog if [[ $MAINT_WINDOW -eq 1 ]]; then printf "Maintenance Window Detected - Exiting\n"; exit; fi # User Vars usrRAM=64; # If RAM (mb) below this value alert triggered. Ex: 1024 usrCPU=25; # If CPU (%) above this value alert triggered. Ex: 90 usrFileSystems=(/ /u01 /u02 /u03); # Space delimited list of file systems. Ex: (/ /u01 /u02) usrFreeSpace=10; # If usrFileSystems below this value (gb) alert triggered. Ex: 10 usrSystems=(mlbltdnasolov01) usrPktChkList="10.230.2.45"; # Comma delimited list of IPs (no spaces). usrPktSize=56; # Default 56 translates to 64 with the 8 byte ICMP header data. For RAC PrivIp use: 8972 usrEmailDelay=30; # How many minutes before email alert sent again (inhibits email alert spam) usrEmailList="michaele@sccu.com"; # "OpNetTeam@sccu.com" "michaele@sccu.com" optCPU=1; optRAM=1; optFileSys=1; optPings=1; optPktSize=1; optNTP=1; ###################################### # Init Script Actions\Functions\Vars # ###################################### # Vars IFS="," fAlerts="$DIR_LOGS/$me.alerts.log" sRAM=`awk '/^Mem/ {print $4}' <(free -m)` sCPU=`mpstat | awk '$3 ~ /CPU/ { for(i=1;i<=NF;i++) { if ($i ~ /%idle/) field=i } } $3 ~ /all/ { print 100 - $field }'` round () { nInteger=$1; echo $nInteger | awk '{print int($1+0.5)}'; } sDateSN=`date "+%Y%m%d_%H%M"` fLastEmail="$DIR_TMP/$me.email.flg" ! [[ -f $fLastEmail ]] && touch $fLastEmail -t 201712151500; # Functions alert() { printf "`now`,$1\n" | tee -a $sSLog nOK2Send=$(( (`date +%s` - `stat -L --format %Y $fLastEmail`) > ($usrEmailDelay*60) )); [[ $nOK2Send -eq 1 ]] && mail -s "$me: $HOSTNAME" "$usrEmailList" <<< "$1"; [[ $nOK2Send -eq 1 ]] && touch $fLastEmail; } ###################################### # Start # ###################################### clear; log "$sSLog" "$sFullName Started"; linesep SysVars_show # CPU if [[ $optCPU -eq 1 ]]; then log "$sSLog" "CPU: $sCPU pct ($usrCPU)" nCPU1=$(round $sCPU); nCPU2=$(round $usrCPU); #printf "$nCPU1, $nCPU2\n"; if [[ $nCPU1 -gt $nCPU2 ]]; then alert "$(hostname) CPU utilization $sCPU pct." fi fi # RAM if [[ $optRAM -eq 1 ]]; then log "$sSLog" "RAM: $sRAM mb ($usrRAM)" if [ $sRAM -le $usrRAM ]; then alert "$(hostname) $sRAM mb free RAM." fi fi # File System if [[ $optFileSys -eq 1 ]]; then log "$sSLog" "File System ($usrFreeSpace)" for fs in ${usrFileSystems[}
do mount | column -t|grep $fs &>/dev/null if ; then # Check Free Space sFreeSpace=`df -P $fs | tail -1 | awk '{print $4}'` sFSgb=`echo "scale=0; $sFreeSpace / 1024^2" | bc` log "$sSLog" " $fs: $sFSgb gb" if [ $sFSgb -le $usrFreeSpace ]; then alert "`hostname` $fs has $sFSgb gigabytes free." fi else # Cannot Access Alert alert "`hostname` cannot access $fs." fi done
fi
- Dependent System Checks
if $optPings -eq 1 ?; then
log "$sSLog" "Dependent Systems Check" for system in ${usrSystems]} do ping -q -c 2 $system > /dev/null 2>&1 if [[ $? -ne 0 ]]; then alert "`hostname` cannot ping $system" else log "$sSLog" " $system: OK" fi done fi # Packet Size Checks (MTU etc.) if [[ $optPktSize -eq 1 ]]; then log "$sSLog" "Packet Size Check" for i in $usrPktChkList do ping $i -c 1 -M do -s $usrPktSize > /dev/null 2>&1 if [[ $? -eq 0 ]]; then log "$sSLog" " $i: OK" else alert " $i: Cannot connect using packet size of $usrPktSize" fi done fi # NTP if [[ $optNTP -eq 1 ]]; then ntpstat > /dev/null 2>&1 if [[ $? -eq 0 ]]; then log "$sSLog" "NTP OK" else alert "`hostname` NTP not in sync." fi fi ###################################### # End # ###################################### log "$sSLog" "$sFullName Ended [Elapse Time: $(elapse)]"; echo $(linesep "=") >> $sHLog; cat $sSLog >> $sHLog; tail -32768 $sHLog > $sHLog.tmp; mv $sHLog.tmp $sHLog