Monitoring Proxmox Backup Server

adoII

Renowned Member
Jan 28, 2010
182
19
83
Hi,
i am thinking about implementing some nagios checks for the proxmox backup server.
I would like to check that the last datastore verify was successfull and not older x days
Also I would like to check that the last Garbage Collection and Pruge was successfull
Also maybe that the last file backup of server x is not older than y
Also the last vm backup is not older than xx

What is the best way to get access to these values without scraping the GUI ?
 
What is the best way to get access to these values without scraping the GUI ?
you can use the api (which is rather underdocumented at the moment, you can for example check what the web gui does for an api call)
or the proxmox-backup-client/proxmox-backup-manager to get those infos
 
Hi,

I have made an attempt for a Nagios check plugin that reports if there are Proxmox Backup Server tasks with status warning or error for the current day.

Python:
#!/usr/bin/python3

# Import libraries
import sys, getopt, requests, urllib3, json
urllib3.disable_warnings()
from datetime import datetime, time, timezone

# Configuration variables
fqdn_pbs = ''
api_token_pbs = ''
debug = False

# Verify and handle command line arguments
options, remainder = getopt.gnu_getopt(sys.argv[1:], 'hdf:t:')

for opt, arg in options:
   if opt == '-h':
      print ('check_pbs.py -d (debug) -f <PBS server fqdn> -t <API token: <username>@pbs!<api id>:<api secret>>')
      sys.exit()
   if opt == '-d':
      debug = True
   if opt == '-f':
      fqdn_pbs = arg
   elif opt == '-t':
      api_token_pbs = 'PBSAPIToken=' + arg


if fqdn_pbs == '' or api_token_pbs == '':
  print ('check_pbs.py -d (debug) -f <PBS server fqdn> -t <API token: <username>@pbs!<api id>:<api secret>>')
  sys.exit(1)

# Set some global variables
taskoutput=[]
processfailure=0

# Function for PBS api query
def get_pbs_tasks(fqdn, status, apitoken):
    totaltasks=0
    midnight = datetime.combine(datetime.today(), time.min)
    resp=requests.get("https://" + fqdn + ":8007/api2/json/nodes/localhost/tasks?limit=0&statusfilter=" + status + "&since=" + midnight.strftime('%s'),verify=False,timeout=5,headers={'Authorization': apitoken})
    if resp.status_code == 401:
      taskoutput.append("Authentication failed for https://" + fqdn + ":8007")
      taskoutput.append('')
      return [False, 0]
    if resp.status_code == 200:
      jsondata = resp.json() # Check the JSON Response Content documentation below
      if debug:
        print(json.dumps(jsondata, indent=2))  # Print complete json data for debugging purposes
        print('')
      for i in jsondata['data']:
        taskoutput.append('worker_id: '+i['worker_id'])
        taskoutput.append('worker_type: '+i['worker_type'])
        taskoutput.append('user: '+i['user'])
        taskoutput.append('type: error')
        taskoutput.append('status: '+i['status'])
        taskoutput.append('starttime: '+str(datetime.fromtimestamp(i['starttime'])))
        taskoutput.append('endtime: '+str(datetime.fromtimestamp(i['endtime'])))
        taskoutput.append('')
        totaltasks+=1
      return [True, totaltasks]
    if resp.status_code != 200:
      taskoutput.append("Failed to access or process https://" + fqdn + ":8007")
      taskoutput.append('')
      return [False, 0]



### Process pbs server tasks

taskoutput.append('')
taskoutput.append('Today\'s unsuccessfull tasks at ' + fqdn_pbs + ':')
taskoutput.append('')

pbs_errors=0
pbs_warnings=0

result = get_pbs_tasks(fqdn_pbs, "error", api_token_pbs)
if result[0] == True:
  pbs_errors+=result[1]
else:
  processfailure+=1

result = get_pbs_tasks(fqdn_pbs, "warning", api_token_pbs)
if result[0] == True:
  pbs_warnings+=result[1]
else:
  processfailure+=1

taskoutput.append('errors: '+str(pbs_errors)+' warnings: '+str(pbs_warnings))
taskoutput.append('')



###Print and return Nagios status and performance data

# Unknown in case of process failures
if processfailure>0:
  print('SERVICE STATUS: UNKNOWN | errors='+str(pbs_errors)+' warnings='+str(pbs_warnings))
  for outputline in taskoutput:
    print(outputline)
  sys.exit(3)

# OK If no errors and warning
if pbs_errors==0 and pbs_warnings==0:
  print('SERVICE STATUS: OK | errors='+str(pbs_errors)+' warnings='+str(pbs_warnings))
  for outputline in taskoutput:
    print(outputline)
  sys.exit(0)

# Warning if no errors and one or more warnings
if pbs_errors==0 and pbs_warnings>0:
  print('SERVICE STATUS: WARNING | errors='+str(pbs_errors)+' warnings='+str(pbs_warnings))
  for outputline in taskoutput:
    print(outputline)
  sys.exit(1)

# Error in case of one or more errors
if pbs_errors>0:
  print('SERVICE STATUS: ERROR | errors='+str(pbs_errors)+' warnings='+str(pbs_warnings))
  for outputline in taskoutput:
    print(outputline)
  sys.exit(2)

Feel free to use it.
 
Last edited:
Hi,

I have made an attempt for a Nagios check plugin that reports if there are Proxmox Backup Server tasks with status warning or error for the current day.

Python:
#!/usr/bin/python3

# Import libraries
import sys, getopt, requests, urllib3, json
urllib3.disable_warnings()
from datetime import datetime, time, timezone

# Configuration variables
fqdn_pbs = ''
api_token_pbs = ''
debug = False

# Verify and handle command line arguments
options, remainder = getopt.gnu_getopt(sys.argv[1:], 'hdf:t:')

for opt, arg in options:
   if opt == '-h':
      print ('check_pbs.py -d (debug) -f <PBS server fqdn> -t <API token: <username>@pbs!<api id>:<api secret>>')
      sys.exit()
   if opt == '-d':
      debug = True
   if opt == '-f':
      fqdn_pbs = arg
   elif opt == '-t':
      api_token_pbs = 'PBSAPIToken=' + arg


if fqdn_pbs == '' or api_token_pbs == '':
  print ('check_pbs.py -d (debug) -f <PBS server fqdn> -t <API token: <username>@pbs!<api id>:<api secret>>')
  sys.exit(1)

# Set some global variables
taskoutput=[]
processfailure=0

# Function for PBS api query
def get_pbs_tasks(fqdn, status, apitoken):
    totaltasks=0
    midnight = datetime.combine(datetime.today(), time.min)
    resp=requests.get("https://" + fqdn + ":8007/api2/json/nodes/localhost/tasks?limit=0&statusfilter=" + status + "&since=" + midnight.strftime('%s'),verify=False,timeout=5,headers={'Authorization': apitoken})
    if resp.status_code == 401:
      taskoutput.append("Authentication failed for https://" + fqdn + ":8007")
      taskoutput.append('')
      return [False, 0]
    if resp.status_code == 200:
      jsondata = resp.json() # Check the JSON Response Content documentation below
      if debug:
        print(json.dumps(jsondata, indent=2))  # Print complete json data for debugging purposes
        print('')
      for i in jsondata['data']:
        taskoutput.append('worker_id: '+i['worker_id'])
        taskoutput.append('worker_type: '+i['worker_type'])
        taskoutput.append('user: '+i['user'])
        taskoutput.append('type: error')
        taskoutput.append('status: '+i['status'])
        taskoutput.append('starttime: '+str(datetime.fromtimestamp(i['starttime'])))
        taskoutput.append('endtime: '+str(datetime.fromtimestamp(i['endtime'])))
        taskoutput.append('')
        totaltasks+=1
      return [True, totaltasks]
    if resp.status_code != 200:
      taskoutput.append("Failed to access or process https://" + fqdn + ":8007")
      taskoutput.append('')
      return [False, 0]



### Process pbs server tasks

taskoutput.append('')
taskoutput.append('Today\'s unsuccessfull tasks at ' + fqdn_pbs + ':')
taskoutput.append('')

pbs_errors=0
pbs_warnings=0

result = get_pbs_tasks(fqdn_pbs, "error", api_token_pbs)
if result[0] == True:
  pbs_errors+=result[1]
else:
  processfailure+=1

result = get_pbs_tasks(fqdn_pbs, "warning", api_token_pbs)
if result[0] == True:
  pbs_warnings+=result[1]
else:
  processfailure+=1

taskoutput.append('errors: '+str(pbs_errors)+' warnings: '+str(pbs_warnings))
taskoutput.append('')



###Print and return Nagios status and performance data

# Unknown in case of process failures
if processfailure>0:
  print('SERVICE STATUS: UNKNOWN | errors='+str(pbs_errors)+' warnings='+str(pbs_warnings))
  for outputline in taskoutput:
    print(outputline)
  sys.exit(3)

# OK If no errors and warning
if pbs_errors==0 and pbs_warnings==0:
  print('SERVICE STATUS: OK | errors='+str(pbs_errors)+' warnings='+str(pbs_warnings))
  for outputline in taskoutput:
    print(outputline)
  sys.exit(0)

# Warning if no errors and one or more warnings
if pbs_errors==0 and pbs_warnings>0:
  print('SERVICE STATUS: WARNING | errors='+str(pbs_errors)+' warnings='+str(pbs_warnings))
  for outputline in taskoutput:
    print(outputline)
  sys.exit(1)

# Error in case of one or more errors
if pbs_errors>0:
  print('SERVICE STATUS: ERROR | errors='+str(pbs_errors)+' warnings='+str(pbs_warnings))
  for outputline in taskoutput:
    print(outputline)
  sys.exit(2)

Feel free to use it.

Hi.

Good job!!. I’m testing it on a Proxmox Backup Server v2.1-2 and it works fine:

Code:
SERVICE STATUS: OK | errors=0 warnings=0

Today's unsuccessfull tasks at localhost:

errors: 0 warnings: 0

But in a lower version (1.1-5) I get an error:

Code:
Traceback (most recent call last):
  File "check_pbs.py", line 83, in <module>
    result = get_pbs_tasks(fqdn_pbs, "warning", api_token_pbs)
  File "check_pbs.py", line 51, in get_pbs_tasks
    taskoutput.append('worker_id: '+i['worker_id'])
TypeError: can only concatenate str (not "NoneType") to str

Any suggestion? Can I get it to work or is it necessary to update PBS?

Thanks in advance.