#!/bin/bash
#
#   Copyright (C) 2017 Rackspace, Inc.
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License along
#   with this program; if not, write to the Free Software Foundation, Inc.,
#   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
#
#   Package name:   recap
#   Author:         Jacob Walcik
#                   Carl Thompson
#                   David King
#
#   Maintainer:     Brent Oswald
#                   Benjamin Graham
#                   Simone Soldateschi
#
#   Homepage:   https://github.com/rackerlabs/recap/
#
#
# The cron execution file is in /etc/cron.d
#
# The variable configuration file for recap is in /etc/recap
#
# All the variables defined here and several others can be defined in the variable
# configuration file.  You can see more on these in /usr/share/doc/recap-{version}/README
#
# Define the default environment for recap
# DO NOT change these values here, user-modifiable options can be tweaked through
# /etc/recap

## Version
declare -r _VERSION='1.1.0'

## Default settings(can *NOT* be overriden in config file)
declare -r DATE=$( date +%F_%T )
declare -r LOCKFILE="/var/lock/recap.lock"
declare -r LOG_SUFFIX=$( date +%Y%m%d-%H%M%S )
PATH=/bin:/usr/bin:/sbin:/usr/sbin

## Default settings(modified through command line arguments)
BACKUP="no"
SNAPSHOT="no"

## Default settings(can be overridden in config file)
BACKUP_ITEMS="fdisk mysql netstat ps pstree resources"
BASEDIR="/var/log/recap"
MAILTO=""
MAXLOAD=$(( $( grep -c sibling /proc/cpuinfo ) * 10 ))
MIN_FREE_SPACE=0
USEFDISK="no"
USEPS="yes"
USEPSTREE="no"
USESLAB="no"

# Parent setting(other settings depend on this)
USERESOURCES="yes"
# These depend on USERRESOURCES to be enabled("yes")
USEDF="yes"
USEFULLSTATUS="no"
USESAR="yes"
USESARQ="no"
USESARR="no"

# Parent setting(other settings depend on this)
USENETSTAT="yes"
# These depend on USENETSTAT to be enabled("yes")
USENETSTATSUM="no"

# Parent setting(other settings depend on this)
USEMYSQL="no"
# These depend on USEMYSQL to be enabled("yes")
DOTMYDOTCNF="/root/.my.cnf"
MYSQL_PROCESS_LIST="table"
USEINNODB="no"
USEMYSQLPROCESSLIST="no"

# Default command options(can be overriden in config file)
OPTS_CURL="-Ls"
OPTS_DF="-x nfs"
OPTS_FDISK="-l"
OPTS_FREE=""
OPTS_IOSTAT="-t -x 1 3"
OPTS_IOTOP="-b -o -t -n 3"
OPTS_NETSTAT="-ntulpae"
OPTS_NETSTAT_SUM="-s"
OPTS_PS="auxfww"
OPTS_PSTREE="-p"
OPTS_VMSTAT="-S M 1 3"
OPTS_STATUSURL="http://localhost:80/"

# Functions

# Cleanup function to remove lock
cleanup() {
  rm -f "${LOCKFILE}"
}

# Create a Lock so that recap does not try to run over itself.
recaplock() {
  (set -C; echo "$$" > "${LOCKFILE}") 2>/dev/null
  if [[ $? -ne 0 ]]; then
    echo "ERROR $( basename $0 ) ($$): Lock File exists - exiting"
    exit 1
  else
    trap 'cleanup; exit 2' 1 2 15 17 19 23
    trap 'cleanup' EXIT
  fi
}

# Ensure our output directories exist before we start creating files
create_output_dirs() {
  BACKUPDIR="${BASEDIR}/backups"
  SNAPSHOTSDIR="${BASEDIR}/snapshots"
  local -a OUTPUT_DIRS
  OUTPUT_DIRS+=( "${BASEDIR}" )
  OUTPUT_DIRS+=( "${BACKUPDIR}" )
  OUTPUT_DIRS+=( "${SNAPSHOTSDIR}" )
  for OUTPUT_DIR in ${OUTPUT_DIRS[@]}; do
    if [[ ! -d "${OUTPUT_DIR}" ]]; then
      mkdir -p "${OUTPUT_DIR}"
    fi
  done
}

# Print the usage definition for the script
print_usage() {
	echo "Usage: recap [OPTION]"
	echo "Takes a snapshot of running processes and resource usage"
	echo " "
	echo "Optional flags: "
	echo "-h, --help     print this help output"
	echo "-B, --backup   copy the last log file set to '/var/log/recap/backups'"
	echo "-S, --snapshot take a timestamped snapshot outside of the regular output file rotation"
	echo "-V, --version  print version and exit"
}

# Create output file
create_output_file() {
  local OUTPUT_FILE="$1"
  if [[ -d "${OUTPUT_FILE}" ]]; then
    echo "Target file already exists: ${OUTPUT_FILE}"
    exit
  else
    #print the data to the output file
    echo "${DATE}" > "${OUTPUT_FILE}"
  fi
}

# Check to see if the output directory exists
check_output_file() {
  local OUTPUT_FILE="$1"
  if [[ ! -r "${OUTPUT_FILE}" ]]; then
    echo "The output file does not exist: ${OUTPUT_FILE}"
    exit
  fi
}

# Print output of "ps auxfww" to the specified file
print_ps() {
  local LOGFILE="$1"
  ps ${OPTS_PS} >> "${LOGFILE}"
}

# Print output of "pstree" to specified file
print_pstree() {
  local LOGFILE="$1"
  pstree ${OPTS_PSTREE} >> "${LOGFILE}"
}

# Print a blank line to the specified file
print_blankline() {
  local LOGFILE="$1"
  echo " " >> "${LOGFILE}"
}

# Print the output of "uptime" to the specified file
print_uptime() {
  local LOGFILE="$1"
  echo "UPTIME report" >> "${LOGFILE}"
  uptime >> "${LOGFILE}"
}

# Print the output of "free" to the specified file
print_free() {
  local LOGFILE="$1"
  echo "FREE report" >> "${LOGFILE}"
  free ${OPTS_FREE} >> "${LOGFILE}"
}

# Print the output of "vmstat" to the specified file
print_vmstat() {
  local LOGFILE="$1"
  echo "VMSTAT report" >> "${LOGFILE}"
  vmstat ${OPTS_VMSTAT} >> "${LOGFILE}"
}

# Print the output of "iostat" to the specified file
print_iostat() {
  local LOGFILE="$1"
  echo "IOSTAT report" >> "${LOGFILE}"
  iostat ${OPTS_IOSTAT} >> "${LOGFILE}"
}

# Print the output of "iotop" to the specified file
print_iotop() {
  local LOGFILE="$1"
  echo "IOTOP report" >> "${LOGFILE}"
  iotop ${OPTS_IOTOP} >> "${LOGFILE}"
}

# Print the output of sar to the specified file
print_sar() {
  local LOGFILE="$1"
  local OPTION="$2"
  local FLAGS=''
  # check to see if we're going to use any parameters for sar
  if [[ "${OPTION}" == "r" ]]; then
    FLAGS="-r"
  elif [[ "${OPTION}" == "q" ]]; then
    FLAGS="-q"
  else
    FLAGS=""
  fi

  echo "SAR${FLAGS} report" >> "${LOGFILE}"
  sar "${FLAGS}" >> "${LOGFILE}"
}

# Print the output of the web server status page to the specified file
print_http_fullstatus() {
  local LOGFILE="$1"
  echo "Web Status report" >> "${LOGFILE}"
  curl ${OPTS_CURL} "${OPTS_STATUSURL}" 2>/dev/null >> "${LOGFILE}"
}

# Print the output of "netstat -ntulpae" to the specified file
print_netstat() {
  local LOGFILE="$1"
  echo "Network connections" >> "${LOGFILE}"
  netstat ${OPTS_NETSTAT} >> "${LOGFILE}"
}

# Print the output of "netstat -s" to the specified file
print_netstat_sum() {
  local LOGFILE="$1"
  echo "Network traffic summary" >> "${LOGFILE}"
  netstat ${OPTS_NETSTAT_SUM} >> "${LOGFILE}"
}

# Print the output of "mysqladmin status" to the mysql file
print_mysql() {
  local LOGFILE="$1"
  local PLESK_FILE="/etc/psa/.psa.shadow"
  if [[ -r "${PLESK_FILE}" ]]; then
    echo "MySQL status" >> "${LOGFILE}"
    mysqladmin -uadmin -p$(cat "${PLESK_FILE}") status >> "${LOGFILE}"
  else
    echo "MySQL ${DOTMYDOTCNF} status" >> "${LOGFILE}"
    mysqladmin --defaults-extra-file="${DOTMYDOTCNF}" status >> "${LOGFILE}"
  fi
  print_blankline "${LOGFILE}"
}

# Print the non-truncated innodb status to the mysql file
print_mysql_innodb_status() {
  local LOGFILE="$1"
  # First, we establish which tmpdir and pid_file are in use, strip any
  # trailing slash and populate the file with current information.
  # We throw away the output of the "show engine innodb status" command as
  # it is likely being truncated, which we attempt to work around:

  echo "MySQL InnoDB status"  >> "${LOGFILE}"
  unset MYVALS PID_FILE TMPDIR
  MYVALS=( $( IFS=$'\t' mysql --defaults-extra-file="${DOTMYDOTCNF}" \
                              -Bse "SHOW VARIABLES LIKE 'pid_file'; \
                                    SHOW VARIABLES LIKE 'tmpdir'; \
                                    SHOW ENGINE INNODB STATUS;" \
                              2>/dev/null \
                | head -n 2 ) )
  PID_FILE="${MYVALS[1]%/}"
  TMPDIR="${MYVALS[3]%/}"

  # Next, we grab a list of descriptors in the tmpdir:
  for name in "/proc/"$(cat ${PID_FILE})"/fd/"*; do
    # We know that InnoDB's temporary files are always in the mysql tmpdir
    # and start with the string 'ib' followed by a randomly generated series
    # of characters.  Now we can compare the canonicalized path of fd to the
    # tmpdir/ib* pattern:
    if [[ "$(readlink -f "${name}")" == "${TMPDIR}"/ib* ]]; then
      # If any files match that pattern, we see if the first line in the file
      # starts with "===", which is the case in the particular InnoDB file we
      # care about:
      head -c8 "${name}" | grep -q '^====' && cat "${name}"  >> "${LOGFILE}"
    fi
  done
}

# Print the output of "mysqladmin processlist" to the mysql file
print_mysql_procs() {
  local LOGFILE="$1"
  local PLESK_FILE="/etc/psa/.psa.shadow"
  if [[ -r "${PLESK_FILE}" ]]; then
    echo "MySQL processes" >> "${LOGFILE}"
    if [[ ${MYSQL_PROCESS_LIST,,} == "table" ]]; then
      mysqladmin -uadmin -p$( cat "${PLESK_FILE}" ) -v processlist \
        >> "${LOGFILE}"
    fi
    if [[ ${MYSQL_PROCESS_LIST,,} == "vertical" ]]; then
      mysql -uadmin -p$( cat "${PLESK_FILE}" ) -e "show full processlist\G" \
        >> "${LOGFILE}"
    fi
  else
    echo "MySQL ${DOTMYDOTCNF} processes" >> "${LOGFILE}"
    if [[ ${MYSQL_PROCESS_LIST} == "table" ]]; then
      mysqladmin --defaults-extra-file="${DOTMYDOTCNF}" -v processlist \
        >> "${LOGFILE}"
    elif [[ ${MYSQL_PROCESS_LIST,,} == "vertical" ]]; then
      mysql --defaults-extra-file="${DOTMYDOTCNF}" \
            -e "show full processlist\G" >> "${LOGFILE}"
    fi
  fi
  print_blankline "${LOGFILE}"
}

# Print the top 10 processes (by cpu usage) to the defined file
print_top_10_cpu() {
  local LOGFILE="$1"
  local pidstat_cpufield=0
  #We need to know whether the version of pidstat is > 10.1.4, since that added a UID field
  local version_strings="$( pidstat -V 2>&1 |
                              awk '{if(NR==1){print $NF}}' ) 10.1.4"
  local sorted_versions="$( echo ${version_strings} | tr ' ' '\n' |
                              sort --version-sort | tr '\n' ' ' )"
  if [[ "${version_strings}" == "${sorted_versions}" ]]; then
    pidstat_cpufield=6
  else
    pidstat_cpufield=7
  fi
  echo "Top 10 cpu using processes" >> "${LOGFILE}"
  # capture header
  pidstat | sed -n '3p' >> "${LOGFILE}"
  pidstat -l 2 2 | grep -v '%system' |
    egrep ^Average: | sort -nr -k ${pidstat_cpufield} |
    head -11 >> "${LOGFILE}"
}

# Print the top 10 processes (by memory usage) to the defined file
print_top_10_mem() {
  local LOGFILE="$1"
  echo "Top 10 memory using processes" >> "${LOGFILE}"
  ps auxww --sort=-rss | head -11 >> "${LOGFILE}"
}

# Print the disk utilization to the defined file
print_df() {
  local LOGFILE="$1"
  echo "Disk Utilization" >> "${LOGFILE}"
  df ${OPTS_DF} >> "${LOGFILE}"
}

# Print the disk partitions to the fdisk file
print_fdisk() {
  local LOGFILE="$1"
  echo "Disk partitions" >> "${LOGFILE}"
  fdisk ${OPTS_FDISK} 2>&1 >> "${LOGFILE}"
}

# Print the slabinfo command to the defined file
print_slabinfo() {
  local LOGFILE="$1"
  echo "Slab Information" >> "${LOGFILE}"
  printf "name\tactive\tnum_obj\tobj_size\thuh\n" >> "${LOGFILE}"
  tail -n+2 /proc/slabinfo |
    awk 'size=$3*$4 {print $1"\t"$2"\t"$3"\t"$4"\t"size/1048576}' |
    sort -k5gr >> "${LOGFILE}"
}

# Send the report via email
send_mail() {
  # create a temp directory
  local TEMPDIR=$( mktemp -d "/tmp/rs.XXXXXXXXXX" )
  local REPORT="${TEMPDIR}/report.log"
  local ITEM_FILE=""
  local ITEM_USE=""

  # echo report files into temporary file for mailing
  for item in ${BACKUP_ITEMS}; do
    ITEM_FILE="${BASEDIR}/${item}_${LOG_SUFFIX}.log"
    # Build the variable to use and then use indirect ref to obtain its value
    ITEM_USE="USE${item^^}"
    if [[ -r "${ITEM_FILE}" && "${!ITEM_USE,,}" == "yes" ]]; then
      echo "-----BEGIN ${item^^} REPORT-----" >> "${REPORT}"
      cat "${ITEM_FILE}" >> "${REPORT}"
      echo "-----END ${item^^} REPORT-----" >> "${REPORT}"
      echo " " >> "${REPORT}"
    fi
  done

  # send email summary to MAILTO address
  if [[ -r "${REPORT}" ]]; then
    cat "${REPORT}" | mail -s "System Monitor Report - ${HOSTNAME}" "${MAILTO}"
    # Remove the temporary files
    rm -rf "${TEMPDIR}"
  fi
}

# Manage item report
run_item_report() {
  local item="$1"
  local ITEM_FILE="${BASEDIR}/${item}_${LOG_SUFFIX}.log"

  if [[ "${SNAPSHOT,,}" == "yes" ]]; then
    ITEM_FILE="${SNAPSHOTSDIR}/${item}_${LOG_SUFFIX}.log_snapshot"
  fi

  create_output_file "${ITEM_FILE}"
  check_output_file "${ITEM_FILE}"
  eval "print_${item}" "${ITEM_FILE}"
}

# Manage resources report
run_resources_report() {
  local item="resources"
  local ITEM_FILE="${BASEDIR}/${item}_${LOG_SUFFIX}.log"

  if [[ "${SNAPSHOT,,}" == "yes" ]]; then
    ITEM_FILE="${SNAPSHOTSDIR}/${item}_${LOG_SUFFIX}.log_snapshot"
  fi

  create_output_file "${ITEM_FILE}"
  check_output_file "${ITEM_FILE}"
  print_uptime "${ITEM_FILE}"
  print_blankline "${ITEM_FILE}"
  print_free "${ITEM_FILE}"
  print_blankline "${ITEM_FILE}"
  print_vmstat "${ITEM_FILE}"
  print_blankline "${ITEM_FILE}"
  print_iostat "${ITEM_FILE}"
  print_blankline "${ITEM_FILE}"
  print_iotop "${ITEM_FILE}"
  print_blankline "${ITEM_FILE}"

  # check to see if sar should be run
  if [[ "${USESAR,,}" == "yes" ]]; then
    print_blankline "${ITEM_FILE}"
    print_sar "${ITEM_FILE}"
  fi

  # check to see if sar -r should be run
  if [[ "${USESARR,,}" == "yes" ]]; then
    # send sar -r output to output file
    print_blankline "${ITEM_FILE}"
    print_sar "${ITEM_FILE}" "r"
  fi

  # check to see if sar -q should be run
  if [[ "${USESARQ,,}" == "yes" ]]; then
    # send sar -q output to output file
    print_blankline "${ITEM_FILE}"
    print_sar "${ITEM_FILE}" "q"
  fi

  # check to see if webserver fullstatus should be run
  type -p 'curl' > /dev/null
  if [[ $? -eq 0 && "${USEFULLSTATUS,,}" == "yes" ]]; then
    print_blankline "${ITEM_FILE}"
    print_http_fullstatus "${ITEM_FILE}"
  fi

  if [[ "${USEDF,,}" == "yes" ]]; then
    # send df -h output to output file
    print_blankline "${ITEM_FILE}"
    print_df "${ITEM_FILE}"
  fi

  if [[ "${USESLAB,,}" == "yes" ]]; then
    # send slabinfo output to output file
    print_blankline "${ITEM_FILE}"
    print_slabinfo "${ITEM_FILE}"
  fi

  print_blankline "${ITEM_FILE}"
  print_top_10_cpu "${ITEM_FILE}"
  print_blankline "${ITEM_FILE}"
  print_top_10_mem "${ITEM_FILE}"
}

# Manage netstat report
run_netstat_report() {
  local item="netstat"
  local ITEM_FILE="${BASEDIR}/${item}_${LOG_SUFFIX}.log"

  if [[ "${SNAPSHOT,,}" == "yes" ]]; then
    ITEM_FILE="${SNAPSHOTSDIR}/${item}_${LOG_SUFFIX}.log_snapshot"
  fi

  create_output_file "${ITEM_FILE}"
  check_output_file "${ITEM_FILE}"
  print_netstat "${ITEM_FILE}"

  # check to see if optional netstat summary report should be run
  if [[ "${USENETSTATSUM,,}" == "yes" ]]; then
    print_blankline "${ITEM_FILE}"
    print_netstat_sum "${ITEM_FILE}"
  fi
}

# Manage mysql report
run_mysql_report() {
  local item="mysql"
  local ITEM_FILE="${BASEDIR}/${item}_${LOG_SUFFIX}.log"

  if [[ "${SNAPSHOT,,}" == "yes" ]]; then
    ITEM_FILE="${SNAPSHOTSDIR}/${item}_${LOG_SUFFIX}.log_snapshot"
  fi

  create_output_file "${ITEM_FILE}"
  check_output_file "${ITEM_FILE}"
  # Don't run reports if can't read the config file
  if [[ ! -r "${DOTMYDOTCNF}" ]]; then
    echo "Error when attempting to read: ${DOTMYDOTCNF}, please check the" \
         "DOTMYDOTCNF setting in the configuration file" >> "${ITEM_FILE}"
    return
  fi

  print_mysql "${ITEM_FILE}"

  # check to see if the optional mysql process list should be generated
  if [[ "${USEMYSQLPROCESSLIST,,}" == "yes" ]]; then
    print_blankline "${ITEM_FILE}"
    print_mysql_procs "${ITEM_FILE}"
  fi
  if [[ "${USEINNODB,,}" == "yes" ]]; then
    # send df -h output to output file
    print_blankline "${ITEM_FILE}"
    print_mysql_innodb_status "${ITEM_FILE}"
  fi
}

# Copy the last log file set to ${BACKUPDIR}
create_backup() {
  for log_file in ${BACKUP_ITEMS}; do
    ls -1t ${BASEDIR}/${log_file}_*.log |
    head -1 |
    xargs -I {} cp {} ${BACKUPDIR}
  done
}

# Check enough disk space is free
check_disk_space() {
  if [[ ${MIN_FREE_SPACE} -eq 0 ]]; then
    return 0
  else
    FREE_SPACE=$( df -BM --output=avail "${BASEDIR}" | tail -1 )
    FREE_SPACE=${FREE_SPACE%M}
    if [[ "${MIN_FREE_SPACE}" -ge "${FREE_SPACE}" ]]; then
      echo "Unable to run recap due to not enough disk space" >&2
    fi
  fi
}

## Main

# Check to see where the configuration file is located.
# If the file is not at /etc/recap, make some noise to alert users
if [[ -r /etc/sysconfig/recap &&
      -r /etc/recap ]]; then
	echo "Configuration files exist at old (/etc/sysconfig/recap) and new locations (/etc/recap). The file from the old location will be read. Please consolidate your configuration details into /etc/recap."
	source /etc/sysconfig/recap
elif [[ -r /etc/sysconfig/recap &&
        ! -r /etc/recap ]]; then
	echo "Configuration file exists at old location (/etc/sysconfig/recap). The file will be read. Please move your configuration file to /etc/recap."
	source /etc/sysconfig/recap
elif [[ ! -r /etc/recap ]]; then
	echo "No configuration file found. Proceeding with defaults."
else
	source /etc/recap
fi

# Quit if load is higher than MAXLOAD
LOAD=$( grep -Po '^[\d\.]+' /proc/loadavg )
QUIT=$( bc -l <<< "${LOAD}>${MAXLOAD}" )
if [[ ${QUIT} -eq 1 ]]; then
  echo "QUIT (LOAD:"${LOAD}", MAXLOAD:"${MAXLOAD}")"
  exit 1
fi

# Evaluate input flags, print usage if more than one flag is used.
if [[ "$#" -gt 1 ]]; then
 echo "Only one flag is allowed"
 print_usage
 exit
fi

# If we only have one flag, see if we know how to handle it.
if [[ "$#" -gt 0 ]]; then
  case "$1" in
    -V|--version)
      echo "${_VERSION}"
      exit 0
      ;;
    -B|--backup)
      # backup latest log files
      BACKUP="yes"
      ;;
    -h|--help)
      # print usage
      print_usage
      exit
      ;;
    -S|--snapshot)
      # take a snapshot outside of the regular output rotation
      SNAPSHOT="yes"
      ;;
    *)
      # user entered an invalid flag, print warning and exit
      echo "Invalid Input"
      print_usage
      exit
  esac
fi

# Verify that script is being run as root
if [[ "$(id -u)" != "0" ]]; then
  echo "This script must be run as root."
  exit
fi

check_disk_space

# Grab the server's host name
HOSTNAME="$( hostname )"

# Create output directory if it is not already present
create_output_dirs

# Take a backup
if [[ "${BACKUP,,}" == "yes" ]]; then
  create_backup
  exit 0
fi

## Proceed to report generation

# Create lock
recaplock

# Run the ps report
if [[ "${USEPS,,}" == "yes" ]]; then
  run_item_report "ps"
fi

# Run the resources report
#TODO: standardize the run_resources_report
if [[ "${USERESOURCES,,}" == "yes" ]]; then
  run_resources_report
fi

# Run the pstree report
if [[ "${USEPSTREE,,}" == "yes" ]]; then
  run_item_report "pstree"
fi

# Run the netstat report
#TODO: standardize the run_netstat_report
if [[ "${USENETSTAT,,}" == "yes" ]]; then
  run_netstat_report
fi

# Run the fdisk report
if [[ "${USEFDISK,,}" == "yes" ]]; then
  run_item_report "fdisk"
fi

# Run the mysql report
#TODO: standardize the run_mysql_report
type -p 'mysqladmin' > /dev/null
if [[ $? -eq 0 && "${USEMYSQL,,}" == "yes" ]]; then
  run_mysql_report
fi

# Check to see if report should be emailed
#TODO: Validate MAILTO format.
if [[ -n "${MAILTO}" ]]; then
  send_mail
fi

# We're done, time to exit
exit 0
