The NFS service provided by the 2 nodes cluster is an "active/passive" one. In this case the normal behaviour is:
Therefore we decide to monitor the situation by:
nfsclusterserver
service is running on the clusterOn all the monitored nodes:
# yum -y install nrpe nagios-plugins-perl perl-Nagios-Plugin
Obtain latest version of the monitoring scripts from here and here and copy them on the relevant directory
# cp check_nfs4.0.2.pl /usr/lib64/nagios/plugins/check_nfs4 # cp check_crm_v0_7 /usr/lib64/nagios/plugins/check_crm # chmod +rx /usr/lib64/nagios/plugins/check_nfs4 # chmod +rx /usr/lib64/nagios/plugins/check_crm
Since all nodes on the cluster share the same domain and users we do not use the idmapd daemon. Its absence is therefore not critical:
sed -i 's/^if (!$idmapd_d) { $daelist/# if (!$idmapd_d) { $daelist/' /usr/lib64/nagios/plugins/check_nfs4
To implement the nagios check as designed we use an helper script that checks if the nfs daemon is running on the tested host or not.
In the former case the result of the check is handled over to the check_nfs4
script:
#!/bin/bash monitor="/usr/sbin/crm_mon -1" # check cluster is healthy ${monitor} -s 1>/dev/null if [ "$?" != "0" ]; then echo "Cluster is not OK!" exit 2 else # # check if there is at least one nfs server active # ${monitor} | grep nfsclusterserver | grep -i started 1>/dev/null if [ "$?" != "0" ]; then echo "NFS server is not running anywhere!" exit 2 else hname=$(hostname -s) ${monitor} | grep $hname | grep nfsclusterserver 1>/dev/null if [ "$?" = "0" ]; then # # I am the nfs server: check if I'm healthy # exec /usr/lib64/nagios/plugins/check_nfs4 else # # I am not the nfs server but: # - the cluster is ok # - the service is running # echo "NFS is running somewhere..." exit 0 fi fi fi
On all the hosts composing the cluster create the file /etc/nrpe.d/check_nfs4.cfg
containing the following directives:
# Allow requests from cld-nagios by adding the cld-nagios IP to the list of allowed hosts allowed_hosts=127.0.0.1,192.168.60.32 # Define the check_crm command: command[check_crm]=/usr/lib64/nagios/plugins/check_crm # Define the check_nfs4 command: # On CentOS the file '/var/log/messages' is readable only # by root so we run this check through 'sudo' command[check_nfs4]=sudo /usr/lib64/nagios/plugins/check_my_nfs
/etc/sudoers.d/nrpe
containingDefaults:nrpe !requiretty nrpe ALL = (root) NOPASSWD: /usr/sbin/crm_mon nrpe ALL = (root) NOPASSWD: /usr/lib64/nagios/plugins/check_my_nfs nrpe ALL = (root) NOPASSWD: /usr/lib64/nagios/plugins/check_nfs4 -v
chmod 440 /etc/sudoers.d/nrpe
firewall-cmd --add-port=5666/tcp firewall-cmd --permanent --add-port=5666/tcp
systemctl start nrpe
systemctl enable nrpe
# rpm -qa | grep nrpe nrpe-2.15-2.el6.x86_64 nagios-plugins-nrpe-2.15-2.el6.x86_64
commands.cfg
file)define command{ command_name check_nrpe_cedc command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -t 480 -c $ARG1$ }
define command{ command_name check_nfs4 contact_groups cedc-admins command_line $USER1$/check_nrpe_cedc -H $HOSTADDRESS$ -c check_nfs4 }
define service{ use server-service ; Name of service template to use contact_groups cedc-admins host_name cld-blu-01 service_description NFSv4 Status check_command check_nrpe_cedc!check_nfs4 }
define command{ command_name check_crm contact_groups cedc-admins command_line $USER1$/check_nrpe_cedc -H $HOSTADDRESS$ -c check_crm }
define service{ use server-service ; Name of service template to use contact_groups cedc-admins host_name cld-blu-01 service_description CFS Cluster Status check_command check_nrpe_cedc!check_crm }
/etc/init.d/nagios reload