
#!/bin/ksh
# File  : cdot_set_qos_settings.sh
# By    : Maarten de Boer, 180116
# Subject       : Script to set QoS settings
# set -x
# ToDos:
# Cleaning up not used QoS-policies. But NOT the 0-INF ones
# Check if right policies defined (<SVM>_class_ ). 
# If not ERROR & mail
#(0.4),180118	: Mod incident when no qos-policy-group at a volume
#(0.5),180118	: Added     --setsize) SETQOS=1 ;;
#
PGM="`basename $0|cut -d\. -f1`"
VER="0.5"
TMP="/tmp/${PGM}.$$"
CLUSTERS="${HOME}/etc/clusters"
MAILTO="maarten.deboer@atos.net"
SSH="/usr/bin/ssh"
HOSTNAME="`hostname | cut -d\. -f1`"
LOG="${HOME}/log/${PGM}.log"
FILTER="[?]*"
MAIL=""
SETQOS=""

MONIDHDR="MAS.NL.1"
SDMCLASS="ZZ-Event.Storage.Storage"
ONTAPARCH="[cdot]"
NAGIOS=""
NAGIOSSEV="WARNING"
NAGIOSSEVNR="1"
NAGIOSPASSIVEDIR="/appl/dfm/nagios/PassiveCheck/DVL"
#NAGIOSPASSIVEDIR="/appl/dfm/nagios/PassiveCheck/PRD"

# PREFIX needed at 'vserver show'
PREFIX="nlnafsmss12"

# QoS -max-throughput 
MAXQTP="INF"
# Storage classes
# Diamond  |               : 5000 IOPS /TB (1-2ms)
# Platinum | First (FC)    : 1500 IOPS /TB (3-5 ms)
# Gold     | Business (BC) :  500 IOPS /TB (4-8 ms)
# Silver   | Economy (EC)  :  250 IOPS /TB (6-12 ms)
# Bronze   | Archive (AC)  :   25 IOPS /TB (8-28 ms)
# MAX_PT = Max percentage as MAX QoS
DIAMONT_TB=5000
DIAMONT_MAX_PT=150
PLATINUM_TB=1500
PLATINUM_MAX_PT=150
GOLD_TB=500
GOLD_MAX_PT=200
SILVER_TB=250
SILVER_MAX_PT=200
BRONZE_TB=25
BRONZE_MAX_PT=200

SSHCMD()
# 1: Filername 2:Command-string
# When issue with connection to cluster, try the nodes (-01 & -02)
# "There are no entries matching your query." => EC=255
# "no connection" is also EC=255
{
  TMPERR="/tmp/${PGM}.$$.err"
  touch ${TMPERR}
  /usr/bin/ssh -n ${1} "${2}" 2> ${TMPERR}
  EC=${?}
  # Check if "ssh: connect to host 10.192.109.202 port 22: Connection refused" If so (EC2=0), the 2nd
  grep 'Connection refused' ${TMPERR}
  EC2=${?}
  if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
    sleep 1
    /usr/bin/ssh -n ${1}-01 "${2}" 2> ${TMPERR}
    EC=${?}
    grep 'Connection refused' ${TMPERR}
    EC2=${?}
    if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
      sleep 1
      /usr/bin/ssh -n ${1}-02 "${2}" 2> ${TMPERR}
      EC=${?}
      grep 'Connection refused' ${TMPERR}
      EC2=${?}
      if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
        sleep 1
        /usr/bin/ssh -n ${1}-03 "${2}" 2> ${TMPERR}
        EC=${?}
        grep 'Connection refused' ${TMPERR}
        EC2=${?}
        if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
          sleep 1
          /usr/bin/ssh -n ${1}-04 "${2}" 2> ${TMPERR}
          EC=${?}
          grep 'Connection refused' ${TMPERR}
          EC2=${?}
          if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
            sleep 1
            /usr/bin/ssh -n ${1}-05 "${2}" 2> ${TMPERR}
            EC=${?}
            grep 'Connection refused' ${TMPERR}
            EC2=${?}
            if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
              sleep 1
              /usr/bin/ssh -n ${1}-06 "${2}" 2> ${TMPERR}
              EC=${?}
              grep 'Connection refused' ${TMPERR}
              EC2=${?}
              if [ ${EC} -ne 0 ] && [ ${EC2} -eq 0 ]; then
                echo  "`date` ${PGM} ERROR with communication to ${1}. Connection to -01 - -06 failed too."|tee -a ${LOG} 
              fi  # EC=0 & EC2=0
            fi  # -06
          fi  # -05
        fi  # -04
      fi  # -03
    fi  # -02
  fi  # -01
  rm ${TMPERR}
}


USAGE()
{
  echo "Usage: ${PGM} [<options>]"
  echo "  Version: ${VER}"
  echo "  options       :"
  echo "    -e|--etc    : Etc/clusters-file (${CLUSTERS})"
  echo "    -f          : Filter filername (${FILTER})"
  echo "    -h|--help   : this Help"
  echo "    -m|--mail   : do send Mail"
  echo "    -V          : show Version"
  echo "    -x          : set -x"
  echo "    --mailto    : change MAILTO address & do send mail (${MAILTO})"
  echo "    --setqos    : do SETQOS"
}
# Check options
while [ $# -gt 0 ]
  do
  case $1 in
    -e | --etc) CLUSTERS=$2; shift ;;
    -f) FILTER=$2; shift ;;
    -m | --mail) MAIL=1 ;;
    -n | --nagios) NAGIOS=1 ;;
    --mailto) MAILTO=$2; MAIL=1; shift ;;
    -h | --help) USAGE; exit 1 ;;
    -V) echo "${PGM}: v${VER}"; exit 3 ;;
    -x)  set -x ;;
    --setqos) SETQOS=1 ;;
    *)  echo "Option ${1} not known. Exiting..."; echo; USAGE; exit 1 ;;
  esac
    shift
done  # case

# MAIN

echo "`date` ${PGM} v${VER} started (CLUSTERS=${CLUSTERS},FILTER=${FILTER},MAIL=${MAIL},NAGIOS=${NAGIOS},PREFIX=${PREFIX})"|tee -a ${LOG}
touch ${TMP}

echo "#; " > ${TMP}

cat "${CLUSTERS}"|grep -v \^#|awk -F\; '{print $1}'|sort|grep "${FILTER}"|while read CLUSTER
do
#  echo "  ${CLUSTER} ..."
# Getting the nodes 1st
  SSHCMD ${CLUSTER} "vserver show -operational-state running"|grep ${PREFIX}|awk '{print $1}' > ${TMP}.2
  cat ${TMP}.2|while read VSERVER
  do
# The 1st line (without LF) can be :
#   ssh: connect to host nlnaf100-01 port 22: Connection refused
# The 2nd line can be:
#   nlnaf100-04 205 days 02:54
# So need the check this out
#echo "1st SSHCMD:"
    echo "  ${CLUSTER}/${VSERVER}..."
# Leave out SVM-root-vols (with "root" in the volume name)
    SSHCMD ${CLUSTER} "set -units GB -showseparator \";\"; vol show -vserver ${VSERVER} -field aggregate,qos-policy-group,total"|grep ${PREFIX}|grep -v root|while read LINE
    do
#      echo ${LINE}
      VOL=`echo ${LINE}|awk -F\; '{print $2}'`
      AGGR=`echo ${LINE}|awk -F\; '{print $3}'`
      TOTAL=`echo ${LINE}|awk -F\; '{print $4}'`
      QOSPOLGR=`echo ${LINE}|awk -F\; '{print $5}'`
      QOS_CLASS=`echo ${LINE}|awk -F\; '{print $5}'|cut -d\_ -f2`
      QOS_SIZE=`echo ${LINE}|awk -F\; '{print $5}'|cut -d\_ -f3`
      echo "  ${VSERVER}:${AGGR}/${VOL}=${TOTAL} | ${QOSPOLGR} (${QOS_CLASS}_${QOS_SIZE})"
 
      if [ "${QOSPOLGR}" = "-" ]; then
# NO QoS-policy set. Generate an incident.
        WARNINGSTR="No qos-policy-group set at volume ${CLUSTER}/${VSERVER}:${VOL}" 
        echo "  ${WARNINGSTR}"|tee -a ${LOG}
        if [ ${MAIL} ]; then
          echo "${WARNINGSTR}"|mailx -s "No QoS-policy set [${PGM} v${VER}]" ${MAILTO}
          echo "  ${WARNINGSTR}. Mailed to ${MAILTO}"|tee -a ${LOG}
        fi  # MAIL
        if [ ${NAGIOS} ]; then
          CI="${CLUSTER}"
# Check for cDOT-node. If not, then -PLF
          ANSW="`echo ${CI}|grep 'nlnaf1[0-9][0-9]-[0-9][0-9]'`"
          if [ "${ANSW}" != "" ]; then
            MONID="${MONIDHDR}.${CI}-PLF"
          fi
          DATI="`date +%Y-%m-%d-%H-%M-%S`"
          NAGIOSFILE="${NAGIOSPASSIVEDIR}/${DATI}"
          MSGTEXTSTR="CI=${CI} ${ONTAPARCH} ${WARNINGSTR} (${DATI}@${HOSTNAME}).|MONID=${MONID};CLASS=${SDMCLASS};"
          echo "${NAGIOSSEVNR}|${MSGTEXTSTR} (${DATI}).|MONID=${MONID};CLASS=${SDMCLASS};"|tee -a ${NAGIOSFILE} ${LOG}
          echo "  ${WARNINGSTR}. Send to NAGIOS (${NAGIOSFILE})"|tee -a ${LOG}
        fi  # NAGIOS

      fi  # ${QOSPOLGR}" = "-"

      if [ "${QOSPOLGR}" != "-" ] && ( [ "${QOS_SIZE}" = "" ] || [ "${TOTAL}" != "${QOS_SIZE}" ] ); then
# If a QoS-pol (not "-") and empty QoS-size / QoS <> vol-total-size
# Define a new name (QOS_NEWGROUP)
        QOS_NEWGROUP="${VSERVER}_${QOS_CLASS}_${TOTAL}"
        echo "  Size NOT set OR Vol-size NOT same as QoS-size. Should be set to ${QOS_NEWGROUP}"
        MAXQTP="INF"
# Calculate NEWGROUP
# let PERC="(${TTLCNT}-${WARNCNT})*100/${TTLCNT}"
# If MAXQTP > TB-value then set TB-value
        TSIZE=`echo ${TOTAL}|sed 's/GB//g'`
        case ${QOS_CLASS} in
          diamond) let MAXQTP="( ${TSIZE} * ${DIAMOND_TB} * (${DIAMOND_MAX_PT}/100) ) / 1024 "
            if [ ${MAXQTP} -lt ${DIAMOND_TB} ]; then
              MAXQTP=${DIAMOND_TB}
            fi
            ;;
          platinum) let MAXQTP="( ${TSIZE} * ${PLATINUM_TB} * (${PLATINUM_MAX_PT}/100) ) / 1024 " 
            if [ ${MAXQTP} -lt ${PLATINUM_TB} ]; then
              MAXQTP=${PLATINUM_TB}
            fi
            ;;
          gold) let MAXQTP="( ${TSIZE} * ${GOLD_TB} * (${GOLD_MAX_PT}/100) ) / 1024 "
            if [ ${MAXQTP} -lt ${GOLD_TB} ]; then
              MAXQTP=${GOLD_TB}
            fi
            ;;
          silver) let MAXQTP="( ${TSIZE} * ${SILVER_TB} * (${SILVER_MAX_PT}/100) ) / 1024 "
            if [ ${MAXQTP} -lt ${SILVER_TB} ]; then
              MAXQTP=${SILVER_TB}
            fi
            ;;
          bronze) let MAXQTP="( ${TSIZE} * ${BRONZE_TB} * (${BRONZE_MAX_PT}/100) ) / 1024 "
            if [ ${MAXQTP} -lt ${BRONZE_TB} ]; then
              MAXQTP=${BRONZE_TB}
            fi
            ;;
          *) MAXQTP="INF" ;;
        esac
        echo "  New QoS: ${VSERVER}:${VOL} QoS:${QOS_NEWGROUP} = ${MAXQTP} (IOPS)"| tee -a ${LOG}
# Check 1st if (new)policy-group exists
        CHK_NEWGROUP=`SSHCMD ${CLUSTER} "qos policy-group show -vserver ${VSERVER} -policy-group ${QOS_NEWGROUP} -field policy-group"|grep ${QOS_NEWGROUP}` 
        if [ "${CHK_NEWGROUP}" = "" ]; then
# NO NEW group, so create
          if [ ${SETQOS} ]; then
            SSHCMD ${CLUSTER} "qos policy-group create -vserver ${VSERVER} -policy-group ${QOS_NEWGROUP} -max-throughput ${MAXQTP}"
          fi
        else
          if [ ${SETQOS} ]; then
            SSHCMD ${CLUSTER} "qos policy-group modify -policy-group ${QOS_NEWGROUP} -max-throughput ${MAXQTP}"
          fi
        fi  # CHK_NEWGROUP
# Set NEW QoS-policy onto volume
        if [ ${SETQOS} ]; then
          SSHCMD ${CLUSTER} "volume modify -vserver ${VSERVER} -volume ${VOL} -qos-policy-group ${QOS_NEWGROUP}"
        fi
      else
# Nothing to be done
        echo "  OK"
      fi  # [ "${QOS_SIZE}" = "" ] || [ "${TOTAL}" != "${QOS_SIZE}" ]
    done  # vol show
  done  # cat ${TMP}.2
done  # CLUSTER


rm ${TMP} ${TMP}.2 
echo "`date` ${PGM} v${VER} finished."|tee -a ${LOG}
exit 0

