What will probably take you the most time after you have Nagios installed and running is getting all or your devices into the cfg files that Nagios uses. What I did was wrote two python Scripts that take Excel spreadsheets and creates the cfg files from them. I have some more things that I want to add into the scripts to handle more configuration options from the spreadsheet, but as it stands they get you up and running rather quickly.
First up is the windows.cfg file script.
The spreadsheet should have the following columns:
Name – short Name for the Server
Alias – long name of the server
IP – IP Address
HostGroup - what group do you want the server put in? You can put more than one Group here, just separate them with commas. The script will create all host groups used in this spreadsheet.
CheckNT – do you want Nagios to check for NSClient++? X for Yes
Uptime – do you want Nagios to retrieve server uptime? X for Yes
CPULoad – do you want Nagios to check the CPU Load? X for Yes
MemUsage – do you want Nagios to check the memory usage? X for Yes
Drives to monitor – Drive letters to monitor disk space on, separate multiple by ;.
MonitorExplorer do you want Nagios to monitor Explorer.exe? X for Yes personally don’t find this useful, but it’s one of the default checks that Nagios gives you.
Parent Device – this is whatever device this server is connected to, this is so that you don’t get alerts on the 12 servers connected to the switch that just powered down that you got an alert for.
Contacts – this allows you to add special contact groups for specific servers in the file. (i.e. notify Security that the Security Camera Server just went offline, or someone that only manages one server can be told that their server is down.) the contact group must already be created, but you can associate them with a device this way.
And now the script:
#=========================================================================
# AUTHOR: Robert Anderson
# DATE: 6/20/2011
# COMMENT: Creates Nagios switch.cfg file from csv
#
#=========================================================================
import csv
strDate = '2011.06.20'
ArrGroups = ['servers']
#InputFile = open('windows2.csv', 'r\n')
outF = open('windows.cfg.txt', 'w')
#write Some Header information to the Final Config File
outF.write('###############################################################################\n')
outF.write('# windows.CFG\n')
outF.write('#\n')
outF.write('# Last Modified: ' + strDate +'\n')
outF.write('#\n')
outF.write('# NOTES: This config file assumes that you are using the sample configuration\n')
outF.write('# files that get installed with the Nagios quickstart guide.\n')
outF.write('#\n')
outF.write('###############################################################################\n')
outF.write('\n')
Devices = csv.reader(open('windows2.csv'))
#Loops through the input file
for Row in Devices:
strName = Row[0]
strAlias = Row[1]
strIP = Row[2]
strHostGroup = Row[3]
strCheckNT = Row[4]
strUptime = Row[5]
strCPULoad = Row[6]
strMemUsage = Row[7]
strDriveSpace = Row[8]
strProcesses = Row[9]
strParent = Row[10]
strContacts = Row[11]
print 'Creating Host: ' +strName+ '...'
outF.write('###############################################################################\n')
outF.write('# ' + strName + ' --- ' + strAlias+'\n')
outF.write('###############################################################################\n')
outF.write('\n')
outF.write('define host{\n')
outF.write(' use generic-switch ; Inherit default values from a template\n')
outF.write(' host_name ' + strName + ' ; The name we\'re giving to this switch\n')
outF.write(' alias ' + strAlias + ' ; A longer name associated with the switch\n')
outF.write(' address ' + strIP + ' ; IP address of the switch\n')
outF.write(' hostgroups windows-servers,' + strHostGroup + ' ; Host groups this switch is associated with\n')
outF.write(' max_check_attempts 3\n')
outF.write(' normal_check_interval 2 ; Check the service every 5 minutes normally\n')
outF.write(' retry_check_interval 1 ; Re-check the service every minute\n')
outF.write(' notification_interval 10\n')
if strContacts!='':
outF.write(' contact_groups admins,Server,'+strContacts+'\n')
else:
outF.write(' contact_groups admins,Server\n')
if strParent!='': outF.write(' parents '+strParent +'\n')
outF.write(' }\n')
outF.write('\n')
outF.write('\n\n')
if strCheckNT=='X':
outF.write('define service{\n')
outF.write(' use generic-service\n')
outF.write(' host_name ' + strName+'\n')
outF.write(' service_description NSClient++ Version\n')
outF.write(' check_command check_nt!CLIENTVERSION\n')
outF.write(' }\n')
if strUptime=='X':
outF.write('define service{\n')
outF.write(' use generic-service\n')
outF.write(' host_name ' + strName+'\n')
outF.write(' service_description Uptime\n')
outF.write(' check_command check_nt!UPTIME\n')
outF.write(' }\n')
if strCPULoad=='X':
outF.write('define service{\n')
outF.write(' use generic-service\n')
outF.write(' host_name ' + strName+'\n')
outF.write(' service_description CPU Load\n')
outF.write(' check_command check_nt!CPULOAD!-l 5,80,90\n')
outF.write(' }\n')
if strMemUsage=='X':
outF.write('define service{\n')
outF.write(' use generic-service\n')
outF.write(' host_name ' + strName+'\n')
outF.write(' service_description Memory Usage\n')
outF.write(' check_command check_nt!MEMUSE!-w 80 -c 90\n')
outF.write(' }\n')
#if strDriveSpace != '' split on ';' and create a space monitor for each element
if strDriveSpace!='':
strDri = strDriveSpace.split(',')
for Drive in strDri:
if Drive!=',':
outF.write('define service{\n')
outF.write(' use generic-service\n')
outF.write(' host_name ' + strName+'\n')
outF.write(' service_description '+Drive+':\ Drive Space\n')
outF.write(' check_command check_nt!USEDDISKSPACE!-l '+Drive+' -w 80 -c 90\n')
outF.write(' }\n')
#if strProcesses != '' split on ';' and create a process monitor for each element
if strProcesses!='':
strPro = strProcesses.split(',')
for Process in strPro:
if Process!=',':
outF.write('define service{\n')
outF.write(' use generic-service\n')
outF.write(' host_name ' + strName+'\n')
outF.write(' service_description '+Process+'\n')
outF.write(' check_command check_nt!PROCSTATE!-d SHOWALL -l '+Process+'\n')
outF.write(' }\n')
outF.write('\n')
outF.write('\n')
outF.write('\n')
if strHostGroup.find(',')!=-1:
intGrpFnd = 0
x = strHostGroup.partition(',')
for group in x:
if group != '':
for exGroup in ArrGroups:
if exGroup == group:
intGrpFnd = 1
if intGrpFnd == 0:
ArrGroups = ArrGroups + [group]
else:
intGrpFnd = 0
for exGroup in ArrGroups:
if exGroup == strHostGroup:
intGrpFnd = 1
if intGrpFnd == 0:
ArrGroups = ArrGroups + [strHostGroup]
outF.write('###############################################################################\n\n')
outF.write('# Define HostGroups\n\n')
outF.write('###############################################################################\n\n')
outF.write('\n')
for group in ArrGroups:
if group!='':
print 'Creating Group: '+group+'...'
outF.write('define hostgroup{\n')
outF.write(' hostgroup_name '+group+' ; The name of the hostgroup\n')
outF.write(' alias '+group+' ; Long name of the group\n')
outF.write(' }\n')
outF.write('\n')
outF.close()
When you run the script you will get a file windows.cfg.txt as the output. The one difference between this config and the ones that come with Nagios is that I group host definition and all of its services together. I did this to make it easier to take out a server later if need be or to find all the monitored services for a specific server.
