
#!/bin/sh
# File	: cdot_chk_snap-mirror-vault_lag.sh
# By	: Maarten.deBoer@Atos.net, (090804) 131210
# Subject	: Check SnapMirror & Vault lags
#(0.2): Made .sh
#(0.3): Check sizes on both filers. Can differ due to usage of autosizing
#(0.4): Check also snapmirror.conf & MAILFILE added
#(0.5): Mod. get SRCFILER & snapmirror.conf from vfilers
#(0.6): Log is now 25 (iso 24) hr. USAGE & -f added
#(0.7): No size check of Broken-off SM's
#(0.8): Added --mailto
#(0.1): Copied from chk_snapmirrors.sh
#(0.2): Added --sm & --sv
#(0.3): Added vfilers (vfiler0 .. rest), LOGGER + CI
#(0.4): Added option snapmirror.enable=off
#(0.5): Added; get snapmirror.conf info at ERROR
#(0.6): Added SV-error status & SUBJECT, ERROR at SM
#(0.7): Mod. empty ERR-file, mod; grep to egrep, Added VFILER at SV-check, Add "cat ${ERR}|sort -u >> ${TMPMAIL}"
#(0.8),150622: Mod. DM adaptet for cdot
#(0.9),230307	: Add SSHCMD
#
# Wentto KSH (in stead of SH) because of the calculations
# Check if SnapMirror lags are to long (> ${LAG}).
#
### Check also if volume size of both SnapMirrored volumes became not equal, 
### due to autosizing of one (source) of them # And mail the output

PGM="`basename $0|cut -d\. -f1`" 
VER="0.9"
TMP="/tmp/${PGM}.$$"
CSV="/tmp/${PGM}.csv"
LOG="${HOME}/log/${PGM}.log"
HOSTNAME="`hostname|cut -d\. -f1`"
CLUSTERS="${HOME}/etc/clusters"
CFILTER="[?]*"
SFILTER="nlnaf"
MAIL=""
MAILTO="maarten.deboer@atos.net"
MAXLOGSIZE=1024   # In K's
SSH="/usr/bin/ssh -n"
MAXLAG_HR="48"  
SMONLY=""
SVONLY=""
CI=${HOSTNAME}
LOGGER=""
MAXSMDISLAG=240
SUBJECT=":${HOSTNAME}: Check Snap"
SNAPTYPE="DP,XDP"


# File	: sshcmd.func
# VER="0.1", <node>-04
# VER="0.2", <node>-06
# VER="0.3", <node>-10
SSHCMD()
# 1: Filername 2:Command-string
# When issue with connection to cluster, try the nodes (-01 & -02)
# "There are no entries matching your query." => EC=255
# "no connection" is also EC=255
{
  TMPERR="/tmp/${PGM}.$$.tmperr"
  touch ${TMPERR}
  /usr/bin/ssh -n ${1} "${2}" 2> ${TMPERR}
  EC=${?}
  # Check if "ssh: connect to host 10.192.109.202 port 22: Connection refused" If so (EC2=0), the 2nd
  grep 'Connection refused' ${TMPERR}
  EC2=${?}
  if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
    sleep 1
    /usr/bin/ssh -n ${1}-05 "${2}" 2> ${TMPERR}
    EC=${?}
    grep 'Connection refused' ${TMPERR}
    EC2=${?}
    if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
      sleep 1
      /usr/bin/ssh -n ${1}-01 "${2}" 2> ${TMPERR}
      EC=${?}
      grep 'Connection refused' ${TMPERR}
      EC2=${?}
      if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
        sleep 1
        /usr/bin/ssh -n ${1}-06 "${2}" 2> ${TMPERR}
        EC=${?}
        grep 'Connection refused' ${TMPERR}
        EC2=${?}
        if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
          sleep 1
          /usr/bin/ssh -n ${1}-02 "${2}" 2> ${TMPERR}
          EC=${?}
          grep 'Connection refused' ${TMPERR}
          EC2=${?}
          if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
            sleep 1
            /usr/bin/ssh -n ${1}-07 "${2}" 2> ${TMPERR}
            EC=${?}
            grep 'Connection refused' ${TMPERR}
            EC2=${?}
            if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
              sleep 1
              /usr/bin/ssh -n ${1}-08 "${2}" 2> ${TMPERR}
              EC=${?}
              grep 'Connection refused' ${TMPERR}
              EC2=${?}
              if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
                sleep 1
                /usr/bin/ssh -n ${1}-09 "${2}" 2> ${TMPERR}
                EC=${?}
                grep 'Connection refused' ${TMPERR}
                EC2=${?}
                if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
                  sleep 1
                  /usr/bin/ssh -n ${1}-10 "${2}" 2> ${TMPERR}
                  EC=${?}
                  grep 'Connection refused' ${TMPERR}
                  EC2=${?}
                  if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
                    sleep 1
                    /usr/bin/ssh -n ${1}-11 "${2}" 2> ${TMPERR}
                    EC=${?}
                    grep 'Connection refused' ${TMPERR}
                    EC2=${?}
                    if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
                      sleep 1
                      /usr/bin/ssh -n ${1}-12 "${2}" 2> ${TMPERR}
                      EC=${?}
                      grep 'Connection refused' ${TMPERR}
                      EC2=${?}
                      if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
                        echo  "`date` ${PGM} ERROR with communication to ${1}. Connection to -01 .. -10 failed too."|tee -a ${LOG} 
                      fi 
                    fi
                  fi 
                fi 
              fi 
            fi  
          fi  
        fi  
      fi  
    fi  
  fi  # 
  rm ${TMPERR}
}

USAGE()
{
  echo "Usage: ${PGM} [<options>]"
  echo "  Version: ${VER}"
  echo "  options:"
  echo "    -c             : Cluster filter (${CFILTER})"
  echo "    -s             : Svm filter (${SFILTER})"
#
  echo "    -h | --help    : this help"
  echo "    -m | --mail    : do send mail"
  echo "    --maxlag       : MAXimum snapmirror/vailt LAG in hours (${MAXLAG_HR})"
  echo "    --sm           : SnapMirror only"
  echo "    --sv           : SnapVault only"
  echo "    -v             : Verbose mode"
  echo "    -V             : Version"
  echo "    -x             : set -x"
  echo "    --mailto       : change MAILTO address & do send mail (${MAILTO})"
}
# Check options
while [ $# -gt 0 ]
  do
  case $1 in
    -c) CFILTER="${2}"; shift ;;
    -s) SFILTER="${2}"; shift ;;
    -h | --help) USAGE; exit 1 ;;
    -m | --mail) MAIL=1 ;;
    --maxlag) MAXLAG_HR="${2}"; shift ;;
    --sm) SMONLY="1" ;;
    --sv) SVONLY="1" ;;
    -V) echo "${PGM}: v${VER}"; exit 3 ;;
    --mailto) MAILTO="$2"; MAIL=1; shift ;;
    -x)  set -x ;;
    *)  echo "Option $1 not known."; USAGE; exit 1 ;;
  esac
    shift
done

if [ ${SMONLY} ]; then
 SNAPTYPE="DP"
 SUBJECT="${SUBJECT} SnapMirror "
fi
if [ ${SVONLY} ]; then
 SNAPTYPE="XDP"
 SUBJECT="${SUBJECT} SnapVault "
fi
SUBJECT="${SUBJECT} lags [${PGM} v${VER}]"

echo "`date` ${PGM} v${VER} started."|tee -a ${LOG}|tee -a ${TMP}.mail
echo ""|tee -a ${TMP}.mail
echo "CLUSTERS=${CLUSTERS}"|tee -a ${TMP}.mail
echo "CFILTER=${CFILTER}"|tee -a ${TMP}.mail
echo "SFILTER=${SFILTER}"|tee -a ${TMP}.mail
echo "HOSTNAME=${HOSTNAME}"|tee -a ${TMP}.mail
echo "LOGGER=${LOGGER}"|tee -a ${TMP}.mail
echo "LOG=${LOG}"|tee -a ${TMP}.mail
echo "MAIL=${MAIL}"|tee -a ${TMP}.mail
echo "MAILTO=${MAILTO}"|tee -a ${TMP}.mail
echo "MAXLAG_HR=${MAXLAG_HR}"|tee -a ${TMP}.mail
echo "SMONLY=${SMONLY}"|tee -a ${TMP}.mail
echo "SVONLY=${SVONLY}"|tee -a ${TMP}.mail
echo "SNAPTYPE=${SNAPTYPE}"|tee -a ${TMP}.mail
sleep 1

echo "  MAXLAG_HR=${MAXLAG_HR}hrs." >> ${LOG}

#  LOG rotating
touch ${LOG}
# Check & move LOG-file if longer then max.
LOGSIZE=`du -ka ${LOG} | cut -f1`
if [ ${LOGSIZE} -ge ${MAXLOGSIZE} ]; then
  mv ${LOG} ${LOG}.old
  touch ${LOG}
fi

if [ ! -f ${CLUSTERS} ]; then
  echo "  Etc-file ${CLUSTERS} NOT found. Exiting ..."|tee -a ${LOG}
  exit 3
fi  # ${FILERS}

echo "# cluster;vserver;source-path;destination-path;type;lag-time (- | > ${MAXLAG_HR}hr) ;" > ${TMP}

cat ${CLUSTERS}|grep -v ^#|grep "${CFILTER}"|while read CLUSTER
do
  echo "  CLUSTER=${CLUSTER}"
  SSHCMD ${CLUSTER} "set -showseparator \";\" ;vserver show -operational-state running -field vserver"|grep "${SFILTER}"|awk -F\; '{print $1}'|while read VSERVER
  do
    echo "    VSERVER=${VSERVER}"
    SSHCMD ${CLUSTER} "set -showseparator \";\" ; snapmirror show -vserver ${VSERVER} -status Idle -type ${SNAPTYPE} -field vserver,source-path,destination-path,type,lag-time"|grep ${VSERVER}|while read LINE
    do
# source-path;destination-path;vserver;lag-time;
      SRC_PATH=`echo ${LINE}|awk -F\; '{print $1}'`
      DEST_PATH=`echo ${LINE}|awk -F\; '{print $2}'`
      SNAP_TYPE=`echo ${LINE}|awk -F\; '{print $3}'`
      LAG_TIME_HR=`echo ${LINE}|awk -F\; '{print $5}'|cut -d\: -f1`
      if [ "${LAG_TIME_HR}" != "-" ]; then
        if [ ${LAG_TIME_HR} -gt ${MAXLAG_HR} ]; then
          echo "${CLUSTER};${VSERVER};${SRC_PATH};${DEST_PATH};${SNAP_TYPE};${LAG_TIME_HR};"|tee -a ${TMP}
        fi  # LAG_TIME_HR} -gt ${MAXLAG_HR
      else
        echo "${CLUSTER};${VSERVER};${SRC_PATH};${DEST_PATH};${SNAP_TYPE};${LAG_TIME_HR};"|tee -a ${TMP}
      fi  # LAG_TIME_HR}" != "-" 

   done  # LINE

  done  # VSERVER

done  # CLUSTER


if [ ${MAIL} ] && [ "${MAILTO}" != "" ]; then
  cp ${TMP} ${CSV}
  cat ${TMP}.mail | mailx -a ${CSV} -s "${SUBJECT}" "${MAILTO}"
  echo "  Mail (${SUBJECT}) had been send to ${MAILTO} ..." | tee -a ${LOG}
fi

echo "`date` ${PGM} v${VER} finished"| tee -a ${LOG}

# Cleanup
rm ${TMP} ${TMP}.mail
exit 0

