#! /bin/bash
################################################################################
#
# Dunuins Vzdump Hook Script for Voyager
#
# This script for PVE 7.3 will enable storages when a backup starts and disable
# them after the backup has finsihed. This is useful in case your backup
# storages aren't online 24/7, as the webUI will get unresponsive or even
# totally unusable when there are unreachable PBS, NFS or SMB storages.
#
# To install it, create a new file at the location of your choice and edit it.
# For example:
# $ nano /var/lib/vz/snippets/vzdump_hook.sh
# Paste all of this code there and save it with CTRL+X, Y.
# Make that script owned by root:
# $ chown root:root /var/lib/vz/snippets/vzdump_hook.sh
# Make the script executable by root:
# $ chmod 750 /var/lib/vz/snippets/vzdump_hook.sh
# Add hook script to /etc/vzdump.conf:
# $ echo 'script: /var/lib/vz/snippets/vzdump_hook.sh' >> /etc/vzdump.conf
#
# Last edit: 2023.05.21 03:12
###############################################################################
# You might want to edit the following parameters:
# Define which storageIDs should be allowed to be enabled/disabled by this
# script. When the array is empty, it will allow all storages. If you only want
# to allow specific storages, add a new storageID by creating a new line like
# this below "incl_storids=()":
# incl_storids+=("MyStorageID")
declare -a incl_storids
incl_storids=()
#incl_storids+=("YourStorageID")
# Definde which storageIDs should not allowed to be enabled/disabled by this
# script. When the array is empty, it won't prevent enabling/disabling of any
# storages. If you want to exclude a storage from enabling/disabling add a new
# line like this below "excl_storids()":
# excl_storids+=("MyStorageID")
declare -a excl_storids
excl_storids=()
#excl_storids+=("YourStorageID")
# set the VMIDs here of the VMs that can't be run at the same time
declare -a vmids_sharing_device1
vmids_sharing_device1=()
#vmids_sharing_device1+=(YourVMID)
# where to store config files
conf_dir="/tmp"
# if VMs that got shutdown because they were part of a group sharing a device
# should be started again when the backup job ends
resume_vmids_sharing_device="false"
# how many seconds to wait between retries
retry_timeout=60
# how many seconds to wait for a VM to shutdown
shutdown_timeout=600
# how often to retry
retry_amount=3
# how many seconds to wait for pvesm commands to finish
check_interval=10
# how many seconds need to be passed since booting the server before backup jobs are allowed to run.
# might be useful in case you virtualize a PBS or NAS that needs to be started first after boot
# in order for the backup storage to get available
boot_delay=300
# End of Config
phase=$1
case "${phase}" in
    job-init \
        | job-start \
        | job-end \
        | job-abort)
        # undef for Proxmox Backup Server storages
        # undef in phase 'job-init' except when --dumpdir is used directly
        dumpdir=$(printenv DUMPDIR)
        # undef when --dumpdir is used directly
        storeid=$(printenv STOREID)
        case "${phase}" in
            job-init)
                # pause backup job if uptime is below boot_delay
                # calculate uptime in seconds
                uptimesec=$(($(date +%s)-$(date +%s --date="$(uptime -s)"))) 
                while [ ${uptimesec} -lt ${boot_delay} ]; do
                    sleep 10
                    # calculate uptime in seconds
                    uptimesec=$(($(date +%s)-$(date +%s --date="$(uptime -s)")))
                done     
                # remove files that store the last running VM of a group
                if [ -f "${conf_dir}/vzdump_resume_vmid1" ]; then
                    rm "${conf_dir}/vzdump_resume_vmid1"
                fi
                # enable storage
                if [ ! -z "${storeid}" ]; then
                    storid_allowed=0
                    if [ ${#incl_storids[@]} -gt 0 ]; then
                        for stid in "${incl_storids[@]}"; do
                            if [ "${stid}" = "${storeid}" ]; then
                                storid_allowed=1
                                break
                            fi
                        done
                    else
                        storid_allowed=1
                    fi
                    if [ ${#excl_storids[@]} -gt 0 ]; then
                        for stid in "${excl_storids[@]}"; do
                            if [ "${stid}" = "${storeid}" ]; then
                                storid_allowed=0
                                break
                            fi
                        done
                    fi
                    if [ ${storid_allowed} -eq 1 ]; then
                        storestatus=$(pvesm status --storage ${storeid} | grep -E "^${storeid}[[:space:]]" | tr -s ' ' | cut -d ' ' -f3)
                        if [ "${storestatus}" = "disabled" ]; then
                            # enable storage and wait for it to become active
                            retries=0
                            while [ ${retries} -lt ${retry_amount} ]; do
                                /usr/sbin/pvesm set "${storeid}" --disable 0
                                timeoutcounter=0
                                while [ ${timeoutcounter} -lt ${retry_timeout} ]; do
                                    sleep ${check_interval}
                                    timeoutcounter+=${check_interval}
                                    storestatus=$(pvesm status --storage ${storeid} | grep -E "^${storeid}[[:space:]]" | tr -s ' ' | cut -d ' ' -f3)
                                    if [ "${storestatus}" = "active" ]; then
                                        echo "$(date '+%Y-%m-%d %H:%M:%S') - Storage '${storeid}' successfully enabled " >> /tmp/hook.log
                                        break 2
                                    fi
                                done
                                retries+=1
                            done
                            if [ ${retries} -ge ${retry_amount} ]; then
                                # fail because storage couldn't be successfully enabled
                                /usr/sbin/pvesm set "${storeid}" --disable 1
                                echo "$(date '+%Y-%m-%d %H:%M:%S') - Error: failed to enable storage '${storeid}'" >> /tmp/hook.log
                                exit 1
                            fi
                        fi
                    fi
                fi
                ;;
            job-end)
                # disable storage
                if [ ! -z "${storeid}" ]; then
                    storid_allowed=0
                    if [ ${#incl_storids[@]} -gt 0 ]; then
                        for stid in "${incl_storids[@]}"; do
                            if [ "${stid}" = "${storeid}" ]; then
                                storid_allowed=1
                                break
                            fi
                        done
                    else
                        storid_allowed=1
                    fi
                    if [ ${#excl_storids[@]} -gt 0 ]; then
                        for stid in "${excl_storids[@]}"; do
                            if [ "${stid}" = "${storeid}" ]; then
                                storid_allowed=0
                                break
                            fi
                        done
                    fi
                    if [ ${storid_allowed} -eq 1 ]; then
                        storestatus=$(pvesm status --storage ${storeid} | grep -E "^${storeid}[[:space:]]" | tr -s ' ' | cut -d ' ' -f3)
                        if [ "${storestatus}" != "disabled" ]; then
                            # disable storage and wait for it to become disabled
                            retries=0
                            while [ ${retries} -lt ${retry_amount} ]; do
                                /usr/sbin/pvesm set "${storeid}" --disable 1
                                timeoutcounter=0
                                while [ ${timeoutcounter} -lt ${retry_timeout} ]; do
                                    sleep ${check_interval}
                                    timeoutcounter+=${check_interval}
                                    storestatus=$(pvesm status --storage ${storeid} | grep -E "^${storeid}[[:space:]]" | tr -s ' ' | cut -d ' ' -f3)
                                    if [ "${storestatus}" = "disabled" ]; then
                                        echo "$(date '+%Y-%m-%d %H:%M:%S') - Storage '${storeid}' successfully disabled " >> /tmp/hook.log
                                        break 2
                                    fi
                                done
                                retries+=1
                            done
                            if [ ${retries} -ge ${retry_amount} ]; then
                                # fail because storage couldn't be successfully disabled
                                echo "$(date '+%Y-%m-%d %H:%M:%S') - Error: failed to disable storage '${storeid}'" >> /tmp/hook.log
                                exit 1
                            fi
                        fi
                    fi
                fi
                # start VMs again that got shutdown for the backup
                if [ "${resume_vmids_sharing_device}" = "true" ]; then
                    if [ -f "${conf_dir}/vzdump_resume_vmid1" ]; then
                        retries=0
                        start_vmid=$(<"${conf_dir}/vzdump_resume_vmid1")
                        # wait until VM is running
                        while [ ${retries} -lt ${retry_amount} ]; do
                            qm start ${start_vmid}
                            timeoutcounter=0
                            while [ ${timeoutcounter} -lt ${retry_timeout} ]; do
                                sleep ${check_interval}
                                timeoutcounter+=${check_interval}
                                vmstatus=$(qm status ${start_vmid} | grep status | sed 's/^status: \(.*\)/\1/')
                                if [ "${vmstatus}" = "running" ]; then
                                    echo "$(date '+%Y-%m-%d %H:%M:%S') - VM with VMID ${start_vmid} successfully started again" >> /tmp/hook.log
                                    break 2
                                fi
                            done
                            retries+=1
                        done
                        # remove file that store the last running VM of a group
                        rm "${conf_dir}/vzdump_resume_vmid1"
                    fi
                fi
                ;;
        esac
        ;;
    backup-start \
        | backup-end \
        | backup-abort \
        | log-end \
        | pre-stop \
        | pre-restart \
        | post-restart)
        mode=$2
        vmid=$3
 
        # shutdown VMs sharing the GPU before doing a backup
        if [ "${phase}" = "backup-start" ]; then
            if [ ${#vmids_sharing_device1[@]} -gt 0 ]; then
                # find out position in the array
                this_vmid_position=-1
                for (( i=0; i<${#vmids_sharing_device1[@]}; i++ )); do
                    if [ "${vmids_sharing_device1[${i}]}" = "${vmid}" ]; then
                            this_vmid_position=${i}
                            break
                    fi
                done
                if [ ${this_vmid_position} -ge 0 ]; then
                    # find out which VMs are running
                    declare -a vmids_state
                    vmids_state=()
                    for (( i=0; i<${#vmids_sharing_device1[@]}; i++ )); do
                        # find out state of VM
                        vmids_state+=($(qm status ${vmids_sharing_device1[${i}]} | grep status | sed 's/^status: \(.*\)/\1/'))
                    done
                    # shutdown running VM
                    for (( i=0; i<${#vmids_state[@]}; i++ )); do
                        if [ "${vmids_state[${i}]}" = "running" ]; then
                            echo "$(date '+%Y-%m-%d %H:%M:%S') - Stutting down VM with VMID '${vmids_sharing_device1[${i}]}'" >> /tmp/hook.log
                            qm shutdown ${vmids_sharing_device1[${i}]} && qm wait ${vmids_sharing_device1[${i}]} -timeout ${shutdown_timeout}
                            if [ "${resume_vmids_sharing_device}" = "true" ]; then
                                echo "${vmids_sharing_device1[${i}]}" > "${conf_dir}/vzdump_resume_vmid1"
                            fi
                        fi
                    done
                fi
            fi
        fi
 
        ;;
    *)
        echo "$(date '+%Y-%m-%d %H:%M:%S') - Error: Phase '${phase}' unknown" >> /tmp/hook.log
        exit 1
        ;;
esac
exit 0