назад к заметкам

Мониторинг состояния системы

#!/bin/bash
# 
# hugin.sh is simple bash script to notify admin by email if load crossed certain limit
# version 1.2
#
# Copyright (C) 2005 nixCraft project
# Copyright (C) 2012 farmal.in
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see .
 
# Set up limit below
NOTIFY="4.0"
 
# admin user email id
EMAIL="admins"
 
# Subject for email
SUBJECT="Alert $(hostname) load average"
 
# -----------------------------------------------------------------
# Os Specifc tweaks do not change anything below ;)
OS="$(uname)"
TRUE="1"
if [ "$OS" == "FreeBSD" ]; then
        TEMPFILE="$(mktemp /tmp/$(basename $0).tmp.XXX)"
        FTEXT='load averages:'
elif [ "$OS" == "Linux" ]; then
        TEMPFILE="$(mktemp)"
        FTEXT='load average:'
fi
 
# get first 5 min load
F5M="$(uptime | awk -F "$FTEXT" '{ print $2 }' | cut -d, -f1 | sed 's/ //g')"
# 10 min
F10M="$(uptime | awk -F "$FTEXT" '{ print $2 }' | cut -d, -f2 | sed 's/ //g')"
# 15 min
F15M="$(uptime | awk -F "$FTEXT" '{ print $2 }' | cut -d, -f3 | sed 's/ //g')"
 
# mail message
# keep it short coz we may send it to page or as an short message (SMS)
echo "Load average Crossed allowed limit $NOTIFY (is now $F15M)." >> $TEMPFILE
echo "Hostname: $(hostname)" >> $TEMPFILE
echo "Local Date & Time : $(date)" >> $TEMPFILE
 
# Look if it crossed limit
# compare it with last 15 min load average
RESULT=$(echo "$F15M > $NOTIFY" | bc)
 
# if so send an email
if [ "$RESULT" == "$TRUE" ]; then
        echo "----------------FREE-----------------------" >> $TEMPFILE
        free -mt >> $TEMPFILE
        echo "----------------IOSTAT---------------------" >> $TEMPFILE
        # input/output statistics to monitor wheather problem is in NIC, RAM or HDD
        iostat >> $TEMPFILE
        echo "---------------NETWORK---------------------" >> $TEMPFILE
        # display network statistics (transfer rates in pakets and kilobytes)
        sar -n DEV 1 3 >> $TEMPFILE
        echo "---------------CPU-------------------------" >> $TEMPFILE
        # display the top CPU consumers
        ps aux | sort -nk +3 | tail -5 >> $TEMPFILE
        echo "---------------RAM-------------------------" >> $TEMPFILE
        # display the top memory consumers
        ps aux | sort -nk +4 | tail -5 >> $TEMPFILE
        echo "----------NUMBER of 80 port connections----" >> $TEMPFILE
        # how many are there connections to 80 port
        netstat -an |grep :80 |wc -l >> $TEMPFILE
        echo "---------------NGINX OPEN FILES------------" >> $TEMPFILE
        ulimit -n >> $TEMPFILE
        for pid in `pidof nginx`; do echo "$(< /proc/$pid/cmdline)"; egrep 'files|Limit' /proc/$pid/limits; echo "Currently open files: $(ls -1 /proc/$pid/fd | wc -l)"; echo; done >> $TEMPFILE
        echo "---------------NETSTAT---------------------" >> $TEMPFILE
        # display open ports (server only)
        netstat -lnpt >> $TEMPFILE
        echo "---------------MYSQL OPEN FILES------------" >> $TEMPFILE
        ulimit -n >> $TEMPFILE
        for pid in `pidof mysqld`; do echo "$(< /proc/$pid/cmdline)"; egrep 'files|Limit' /proc/$pid/limits; echo "Currently open files: $(ls -1 /proc/$pid/fd | wc -l)"; echo; done >> $TEMPFILE
        echo "---------------MYSQL-PROCESSLIST-----------" >> $TEMPFILE
        mysqladmin -v processlist extended-status -u root -pMYSQLPASSWORD >> $TEMPFILE
        echo "---------------TOP-------------------------" >> $TEMPFILE
        # full top listing
        top -b -n1 >> $TEMPFILE
        mail -s "$SUBJECT" "$EMAIL" < $TEMPFILE
fi
 
# remove file
rm -f $TEMPFILE