
#!/bin/sh
# File  : cdot_chk_environment.sh
# By    : Maarten de Boer, 170905
# Subject       : Script to check cDOR environments
# set -x
PGM="`basename $0|cut -d\. -f1`"
VER="0.1"
TMP="/tmp/${PGM}.$$"
CLUSTERS="${HOME}/etc/clusters"
MAILTO="maarten.deboer@atos.net"
SSH="/usr/bin/ssh"
HOSTNAME="`hostname | cut -d\. -f1`"
LOG="${HOME}/log/${PGM}.log"
ASC="${PGM}.asc"
FILTER="[?]*"
MAIL=""

USAGE()
{
  echo "Usage: ${PGM} [<options>]"
  echo "  Version: ${VER}"
  echo "  options       :"
  echo "    -e|--etc    : Etc/clusters-file (${CLUSTERS})"
  echo "    -f          : Filter filername (${FILTER})"
  echo "    -h|--help   : this Help"
  echo "    -m|--mail   : do send Mail"
  echo "    -V          : show Version"
  echo "    -x          : set -x"
  echo "    --mailto    : change MAILTO address & do send mail (${MAILTO})"
}
# Check options
while [ $# -gt 0 ]
  do
  case $1 in
    -e | --etc) FILERS=$2; shift ;;
    -f) FILTER=$2; shift ;;
    -m | --mail) MAIL=1 ;;
    --mailto) MAILTO=$2; MAIL=1; shift ;;
    -h | --help) USAGE; exit 1 ;;
    -V) echo "${PGM}: v${VER}"; exit 3 ;;
    -x)  set -x ;;
    *)  echo "Option ${1} not known. Exiting..."; echo; USAGE; exit 1 ;;
  esac
    shift
done  # case

SSHCMD()
# 1: Filername 2:Command-string
# When issue with connection to cluster, try the nodes (-01 & -02)
# "There are no entries matching your query." => EC=255
# "no connection" is also EC=255
{
  TMPERR="/tmp/${PGM}.$$.err"
  /usr/bin/ssh -n ${1} "${2}" 2> ${TMPERR}
  EC=${?}
  # Check if "ssh: connect to host 10.192.109.202 port 22: Connection refused" If so (EC2=0), the 2nd
  grep 'Connection refused' ${TMPERR}
  EC2=${?}
  if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
    sleep 1
    /usr/bin/ssh -n ${1}-06 "${2}" 2> ${TMPERR}
    EC=${?}
    grep 'Connection refused' ${TMPERR}
    EC2=${?}
    if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
      sleep 1
      /usr/bin/ssh -n ${1}-05 "${2}" 2> ${TMPERR}
      EC=${?}
      grep 'Connection refused' ${TMPERR}
      EC2=${?}
      if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
        sleep 1
        /usr/bin/ssh -n ${1}-04 "${2}" 2> ${TMPERR}
        EC=${?}
        grep 'Connection refused' ${TMPERR}
        EC2=${?}
        if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
          sleep 1
          /usr/bin/ssh -n ${1}-03 "${2}" 2> ${TMPERR}
          EC=${?}
          grep 'Connection refused' ${TMPERR}
          EC2=${?}
          if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
            sleep 1
            /usr/bin/ssh -n ${1}-02 "${2}" 2> ${TMPERR}
            EC=${?}
            grep 'Connection refused' ${TMPERR}
            EC2=${?}

            if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
              sleep 1
              /usr/bin/ssh -n ${1}-01 "${2}" 2> ${TMPERR}
              EC=${?}
              grep 'Connection refused' ${TMPERR}
              EC2=${?}

              if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
                echo  "`date` ${PGM} ERROR with communication to ${1}. Connection to -01 - -06 failed too."|tee -a ${LOG} 
              fi  # EC=0 & EC2=0

            fi  # -01
          fi  # -02
        fi  # -03
      fi  # -04
    fi  # -05
  fi  # -06
  rm ${TMPERR}
}


echo "`date` ${PGM} v${VER} started"|tee -a ${LOG}
touch ${TMP} /tmp/${ASC}

echo "" > ${TMP}
cat "${CLUSTERS}"|grep -v \^#|awk -F\; '{print $1}'|sort|grep "${FILTER}"|while read CLUSTER
do
  echo "  ${CLUSTER}: Getting fault info ..."
  SSHCMD ${CLUSTER} "system environment sensors show -field state,type,node,name -state fault"
  SSHCMD ${CLUSTER} "storage shelf show -field psu-id,psu-op-status,error-type,op-status,error-sev"|grep ${CLUSTER}|grep -v normal

done  #  CLUSTER


if [ ${MAIL} ]; then
  cp ${TMP} /tmp/${ASC}

  echo "`date` mailed to ${MAILTO} "|tee -a ${LOG}
fi  # MAIL

rm ${TMP} /tmp/${ASC}
echo "`date` ${PGM} v${VER} finished."|tee -a ${LOG}
exit 0

