#!/usr/bin/env bash
set -euo pipefail
# ====== CONFIG ======
NODE="$(hostname)" # Lokaler PVE-Node
PBS_STORE="pbs" # Name des in PVE eingebundenen PBS-Storage (z.B. "pbs" oder "ProxmoxBS")
TARGET_STORE="local-zfs" # Ziel-Storage für Restore
VMIDS=(101 102 201 202) # IDs der Cold-Standby-Instanzen (VM & LXC gemischt möglich)
DRYRUN="${DRYRUN:-0}" # DRYRUN=1 -> nur anzeigen, nichts ausführen
KEEP_CONFIG_BACKUP=1 # Vor Destroy ein qm/pct config-Backup anlegen
SHUTDOWN_TIMEOUT=120 # Sekunden für graceful shutdown
FORCE_STOP=1 # Nach Timeout hart stoppen (1=ja/0=nein)
LOCK_WAIT=300 # Sekunden auf Lock warten, bevor abgebrochen wird
CONFIG_BACKUP_DIR="/root/restore-config-backups"
RESTORE_OPTS_VM="" # z.B. "--unique 0" (Default: Config aus Backup übernehmen)
RESTORE_OPTS_CT=""
# =====================
need() { command -v "$1" >/dev/null || { echo "Fehlt: $1"; exit 1; }; }
need pvesh; need jq; need qm; need pct
log(){ echo "[$(date +'%F %T')] $*"; }
run(){
if [[ "$DRYRUN" = "1" ]]; then
echo "[DRYRUN] $*"
else
eval "$@"
fi
}
ensure_dirs(){
[[ -d "$CONFIG_BACKUP_DIR" ]] || run "mkdir -p '$CONFIG_BACKUP_DIR'"
}
storage_exists(){
pvesh get "/nodes/$NODE/storage" | jq -e --arg id "$PBS_STORE" '.[]|select(.storage==$id)' >/dev/null
}
get_type(){
local vmid="$1"
if qm config "$vmid" >/dev/null 2>&1; then
echo "vm"
elif pct config "$vmid" >/dev/null 2>&1; then
echo "ct"
else
echo "none"
fi
}
status_vm(){
local vmid="$1"
qm status "$vmid" 2>/dev/null | awk '{print $2}'
}
status_ct(){
local vmid="$1"
pct status "$vmid" 2>/dev/null | awk '{print $2}'
}
wait_no_lock(){
local kind="$1" vmid="$2" waited=0
while : ; do
if [[ "$kind" == "vm" ]]; then
if ! qm config "$vmid" 2>/dev/null | grep -q '^lock:'; then break; fi
else
if ! pct config "$vmid" 2>/dev/null | grep -q '^lock:'; then break; fi
fi
[[ $waited -ge $LOCK_WAIT ]] && { log "Lock auf $kind/$vmid Timeout nach ${LOCK_WAIT}s"; return 1; }
sleep 2; waited=$((waited+2))
done
return 0
}
unlock_if_possible(){
local kind="$1" vmid="$2"
if [[ "$kind" == "vm" ]]; then
if qm config "$vmid" 2>/dev/null | grep -q '^lock:'; then
log "Versuche qm unlock $vmid"
run "qm unlock $vmid"
fi
else
if pct config "$vmid" 2>/dev/null | grep -q '^lock:'; then
log "Versuche pct unlock $vmid"
run "pct unlock $vmid"
fi
fi
}
shutdown_safe(){
local kind="$1" vmid="$2"
if [[ "$kind" == "vm" ]]; then
local s; s="$(status_vm "$vmid" || true)"
if [[ "$s" == "running" ]]; then
log "VM $vmid: graceful shutdown (${SHUTDOWN_TIMEOUT}s)…"
run "qm shutdown $vmid --timeout $SHUTDOWN_TIMEOUT"
s="$(status_vm "$vmid" || true)"
if [[ "$s" == "running" && "$FORCE_STOP" = "1" ]]; then
log "VM $vmid: Timeout – hartes Stop."
run "qm stop $vmid"
fi
fi
else
local s; s="$(status_ct "$vmid" || true)"
if [[ "$s" == "running" ]]; then
log "CT $vmid: graceful shutdown (${SHUTDOWN_TIMEOUT}s)…"
# forceStop: 1 = hart nach Timeout
local fopt=""; [[ "$FORCE_STOP" = "1" ]] && fopt="--forceStop 1"
run "pct shutdown $vmid --timeout $SHUTDOWN_TIMEOUT $fopt"
s="$(status_ct "$vmid" || true)"
if [[ "$s" == "running" && "$FORCE_STOP" = "1" ]]; then
log "CT $vmid: Timeout – hartes Stop."
run "pct stop $vmid"
fi
fi
fi
}
backup_config(){
local kind="$1" vmid="$2"
[[ "$KEEP_CONFIG_BACKUP" = "1" ]] || return 0
ensure_dirs
local ts; ts="$(date +'%F_%H%M%S')"
if [[ "$kind" == "vm" ]]; then
run "qm config $vmid > '$CONFIG_BACKUP_DIR/qm-$vmid-$ts.conf'"
else
run "pct config $vmid > '$CONFIG_BACKUP_DIR/pct-$vmid-$ts.conf'"
fi
}
destroy_safe(){
local kind="$1" vmid="$2"
log "$kind $vmid: destroy (purge)…"
if [[ "$kind" == "vm" ]]; then
run "qm destroy $vmid --purge 1"
else
run "pct destroy $vmid --purge 1"
fi
}
get_latest_volid(){
local vmid="$1" kind="$2"
local pathtype="vm"; [[ "$kind" == "ct" ]] && pathtype="ct"
# Liste aller PBS-Backups filtern: gleiche VMID + richtiger Typ, nach ctime sortiert, letztes nehmen
pvesh get "/nodes/$NODE/storage/$PBS_STORE/content" --content backup \
| jq -r --argjson id "$vmid" --arg pt "$pathtype" '
map(select(.vmid == $id and (.volid|test("/\($pt)/"))))
| sort_by(.ctime) | last | .volid // empty
'
}
restore_from_pbs(){
local vmid="$1" kind="$2" volid="$3"
if [[ -z "$volid" ]]; then
log "$kind $vmid: Kein Backup im PBS gefunden – SKIP."
return 0
fi
log "$kind $vmid: Restore von $volid -> $TARGET_STORE"
if [[ "$kind" == "vm" ]]; then
run "qmrestore '$volid' $vmid --storage '$TARGET_STORE' --force 1 $RESTORE_OPTS_VM"
else
run "pct restore $vmid '$volid' --storage '$TARGET_STORE' --force 1 $RESTORE_OPTS_CT"
fi
}
main(){
storage_exists || { log "PBS-Storage '$PBS_STORE' nicht gefunden auf Node '$NODE'."; exit 1; }
for vmid in "${VMIDS[@]}"; do
log "==== Bearbeite VMID $vmid ===="
local kind; kind="$(get_type "$vmid")" # vm|ct|none
if [[ "$kind" == "none" ]]; then
# Nicht vorhanden -> direkt nach passendem Typ im PBS suchen: erst VM, dann CT
for trykind in vm ct; do
local volid; volid="$(get_latest_volid "$vmid" "$trykind")"
if [[ -n "$volid" ]]; then
log "VMID $vmid existiert nicht – finde Backup-Typ: $trykind"
restore_from_pbs "$vmid" "$trykind" "$volid"
kind="$trykind"
break
fi
done
[[ "$kind" == "none" ]] && { log "VMID $vmid: kein Backup gefunden – SKIP."; continue; }
continue
fi
# Falls vorhanden: Locks handhaben, sauber beenden, Config sichern, destroy, restore
unlock_if_possible "$kind" "$vmid" || true
wait_no_lock "$kind" "$vmid" || { log "$kind $vmid: Lock konnte nicht aufgehoben werden – SKIP."; continue; }
shutdown_safe "$kind" "$vmid"
backup_config "$kind" "$vmid"
destroy_safe "$kind" "$vmid"
local volid; volid="$(get_latest_volid "$vmid" "$kind")"
restore_from_pbs "$vmid" "$kind" "$volid"
done
log "Fertig."
}
main "$@"