From e77ad65c550e73f316106475db776afc5b2f4f25 Mon Sep 17 00:00:00 2001 From: linggao Date: Thu, 28 Feb 2008 20:21:35 +0000 Subject: [PATCH] SNMP monitoring support. more to come. git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@626 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd --- .../lib/xcat/monitoring/snmpmon.pm | 302 ++++++++++++++++++ xCAT-server-2.0/sbin/xcat_traphandler | 171 ++++++++++ 2 files changed, 473 insertions(+) create mode 100644 xCAT-server-2.0/lib/xcat/monitoring/snmpmon.pm create mode 100755 xCAT-server-2.0/sbin/xcat_traphandler diff --git a/xCAT-server-2.0/lib/xcat/monitoring/snmpmon.pm b/xCAT-server-2.0/lib/xcat/monitoring/snmpmon.pm new file mode 100644 index 000000000..f35ba4349 --- /dev/null +++ b/xCAT-server-2.0/lib/xcat/monitoring/snmpmon.pm @@ -0,0 +1,302 @@ +#!/usr/bin/env perl +# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html +package xCAT_monitoring::snmpmon; +BEGIN +{ + $::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : '/opt/xcat'; +} +use lib "$::XCATROOT/lib/perl"; +use IO::File; + +print "xCAT_monitoring::snmpmon loaded\n"; +1; + + + +#------------------------------------------------------------------------------- +=head1 xCAT_monitoring:snmpmon +=head2 Package Description + xCAT monitoring plugin package to handle SNMP monitoring. + +=cut +#------------------------------------------------------------------------------- + +#-------------------------------------------------------------------------------- +=head3 start + This function gets called by the monitorctrl module + when xcatd starts. + Arguments: + monservers --A hash reference keyed by the monitoring server nodes + and each value is a ref to an array of [nodes, nodetype, status] arrays + monitored by the server. So the format is: + {monserver1=>[['node1', 'osi', 'active'], ['node2', 'switch', 'booting']...], ...} + Returns: + (return code, message) +=cut +#-------------------------------------------------------------------------------- +sub start { + print "snmpmon::start called\n"; + + $noderef=shift; + if ($noderef =~ /xCAT_monitoring::snmpmon/) { + $noderef=shift; + } + + # unless we are running on linux, exit. + #unless($^O eq "linux"){ + # exit; + # } + + # check supported snmp package + my $cmd; + my @snmpPkg = `/bin/rpm -qa | grep snmp`; + my $pkginstalled = grep(/net-snmp/, @snmpPkg); + + if ($pkginstalled) { + my ($ret, $err)=configSNMP(); + if ($ret != 0) { return ($ret, $err);} + } else { + return (1, "net-snmp is not installed") + } + + return (0, "started") +} + + +#-------------------------------------------------------------------------------- +=head3 configSNMP + This function puts xcat_traphanlder into the snmptrapd.conf file and + restarts the snmptrapd with the new configuration. + Arguments: + none. + Returns: + (return code, message) +=cut +#-------------------------------------------------------------------------------- +sub configSNMP { + my $cmd; + # now move /usr/share/snmptrapd.conf to /usr/share/snmptrapd.conf.orig + # if it exists. + if (-f "/usr/share/snmp/snmptrapd.conf"){ + + # if the file exists and has references to xcat_traphandler then + # there is nothing that needs to be done. + `/bin/grep xcat_traphandler /usr/share/snmp/snmptrapd.conf > /dev/null`; + + # if the return code is 1, then there is no xcat_traphandler + # references and we need to put them in. + if($? >> 8){ + # back up the original file. + `/bin/cp -f /usr/share/snmp/snmptrapd.conf /usr/share/snmp/snmptrapd.conf.orig`; + + # if the file exists and does not have "authCommunity execute public" then add it. + open(FILE1, "/usr/share/snmp/snmptrapd.conf.tmp"); + my $found=0; + while (readline(FILE1)) { + if (/\s*authCommunity.*public/) { + $found=1; + if (!/\s*authCommunity\s*.*execute.*public/) { + s/authCommunity\s*(.*)\s* public/authCommunity $1,execute public/; #modify it to have execute if found + } + } + print FILE $_; + } + + if (!$found) { + print FILE "authCommunity execute public\n"; #add new one if not found + } + + # now add the new traphandle commands: + print FILE "traphandle default $::XCATROOT/sbin/xcat_traphandler\n"; + + close($handle); + close(FILE); + `mv -f /usr/share/snmp/snmptrapd.conf.tmp /usr/share/snmp/snmptrapd.conf`; + } + } + else { # The snmptrapd.conf file does not exists + # create the file: + open($handle, ">/usr/share/snmp/snmptrapd.conf"); + print $handle "authCommunity execute public\n"; + print $handle "traphandle default $::XCATROOT/sbin/xcat_traphandler\n"; + close($handle); + } + + # TODO: put the mib files to /usr/share/snmp/mibs + + # get the PID of the currently running snmptrapd if it is running. + # then stop it and restart it again so that it reads our new + # snmptrapd.conf configuration file. Then the process + chomp(my $pid= `/bin/ps -ef | /bin/grep snmptrapd | /bin/grep -v grep | /bin/awk '{print \$2}'`); + if($pid){ + `/bin/kill -9 $pid`; + } + # start it up again! + system("/usr/sbin/snmptrapd -m ALL"); + + # get the PID of the currently running snmpd if it is running. + # if it's running then we just leave. Otherwise, if we don't get A PID, then we + # assume that it isn't running, and start it up again! + chomp(my $pid= `/bin/ps -ef | /bin/grep snmpd | /bin/grep -v grep | /bin/awk '{print \$2}'`); + unless($pid){ + # start it up again! + system("/usr/sbin/snmpd"); + } + + return (0, "started"); +} + + +#-------------------------------------------------------------------------------- +=head3 stop + This function gets called by the monitorctrl module when + xcatd stops. + Arguments: + none + Returns: + (return code, message) +=cut +#-------------------------------------------------------------------------------- +sub stop { + print "snmpmon::stop called\n"; + + if (-f "/usr/share/snmp/snmptrapd.conf.orig"){ + # copy back the old one + `mv -f /usr/share/snmp/snmptrapd.conf.orig /usr/share/snmp/snmptrapd.conf`; + } else { + if (-f "/usr/share/snmp/snmptrapd.conf"){ + + # if the file exists, delete all entries that have xcat_traphandler + my $cmd = "grep -v xcat_traphandler /usr/share/snmp/snmptrapd.conf "; + $cmd .= "> /usr/share/snmp/snmptrapd.conf.unconfig "; + `$cmd`; + + # move it back to the snmptrapd.conf file. + `mv -f /usr/share/snmp/snmptrapd.conf.unconfig /usr/share/snmp/snmptrapd.conf`; + } + } + + # now check to see if the daemon is running. If it is then we need to restart + # it with the new snmptrapd.conf file that will not forward events to RMC. + my $pid=""; + chomp($pid= `/bin/ps -ef | /bin/grep snmptrapd | /bin/grep -v grep | /bin/awk '{print $\2}'`); + if($pid){ + `/bin/kill -9 $pid`; + # start it up again! + system("/usr/sbin/snmptrapd"); + } + + return (0, "stopped"); +} + + + + +#-------------------------------------------------------------------------------- +=head3 supportNodeStatusMon + This function is called by the monitorctrl module to check + if SNMP can help monitoring and returning the node status. + SNMP does not support this function. + + Arguments: + none + Returns: + 1 +=cut +#-------------------------------------------------------------------------------- +sub supportNodeStatusMon { + print "snmpmon::supportNodeStatusMon called\n"; + return 0; +} + + + +#-------------------------------------------------------------------------------- +=head3 startNodeStatusMon + This function is called by the monitorctrl module to tell + SNMP to start monitoring the node status and feed them back + to xCAT. SNMP does not have this support. + + Arguments: + monservers --A hash reference keyed by the monitoring server nodes + and each value is a ref to an array of [nodes, nodetype, status] arrays + monitored by the server. So the format is: + {monserver1=>[['node1', 'osi', 'active'], ['node2', 'switch', 'booting']...], ...} + Returns: + (return code, message) + +=cut +#-------------------------------------------------------------------------------- +sub startNodeStatusMon { + print "snmpmon::startNodeStatusMon called\n"; + return (1, "This function is not supported."); +} + + +#-------------------------------------------------------------------------------- +=head3 stopNodeStatusMon + This function is called by the monitorctrl module to tell + SNMP to stop feeding the node status info back to xCAT. + SNMP does not support this function. + + Arguments: + none + Returns: + (return code, message) +=cut +#-------------------------------------------------------------------------------- +sub stopNodeStatusMon { + print "snmpmon::stopNodeStatusMon called\n"; + return (1, "This function is not supported."); +} + + +#-------------------------------------------------------------------------------- +=head3 addNodes + This function adds the nodes into the SNMP domain. + Arguments: + nodes --nodes to be added. It is a hash reference keyed by the monitoring server + nodes and each value is a ref to an array of [nodes, nodetype, status] arrays monitored + by the server. So the format is: + {monserver1=>[['node1', 'osi', 'active'], ['node2', 'switch', 'booting']...], ...} + verbose -- verbose mode. 1 for yes, 0 for no. + Returns: + none +=cut +#-------------------------------------------------------------------------------- +sub addNodes { + my $noderef=shift; + if ($noderef =~ /xCAT_monitoring::snmpmon/) { + $noderef=shift; + } + + print "snmpmon::addNodes get called\n"; + + return 0; +} + +#-------------------------------------------------------------------------------- +=head3 removeNodes + This function removes the nodes from the SNMP domain. + Arguments: + nodes --nodes to be removed. It is a hash reference keyed by the monitoring server + nodes and each value is a ref to an array of [nodes, nodetype, status] arrays monitored + by the server. So the format is: + {monserver1=>[['node1', 'osi', 'active'], ['node2', 'switch', 'booting']...], ...} + verbose -- verbose mode. 1 for yes, 0 for no. + Returns: + none +=cut +#-------------------------------------------------------------------------------- +sub removeNodes { + my $noderef=shift; + if ($noderef =~ /xCAT_monitoring::snmpmon/) { + $noderef=shift; + } + + print "snmpmon::removeNodes called\n"; + + return 0; +} + diff --git a/xCAT-server-2.0/sbin/xcat_traphandler b/xCAT-server-2.0/sbin/xcat_traphandler new file mode 100755 index 000000000..54d34513d --- /dev/null +++ b/xCAT-server-2.0/sbin/xcat_traphandler @@ -0,0 +1,171 @@ +#!/usr/bin/env perl +# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html +BEGIN +{ + $::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : '/opt/xcat'; +} +use lib "$::XCATROOT/lib/perl"; +use Sys::Syslog; +use xCAT::Table; + +# admin needs to create a mail aliase called alerts +# put "alerts: emailadd,emailaddr.." to /etc/aliases file +# then run newaliases command +my $MAILTO="alerts"; + +my $message; +my $briefmsg; +my $pair; +my $node1; +my $info; +my $severity_type; +my $severity; + + +my $host=; +chomp($host); +my $ip=; +chomp($ip); + +while ($pair=) { + chomp($pair); + @a=split(/ /, $pair); + $oid=shift @a; + $value=join(' ', @a); + + $message .= " $oid=$value\n"; + + #for BladeCenter MM traps, creat a brief message + if ($oid =~ /BLADESPPALT-MIB::spTrapAppId/) { + $briefmsg .= " App ID: $value\n"; + } + elsif (($oid =~ /BLADESPPALT-MIB::spTrapAppType/) && ($value)) { + $briefmsg .= " Alert Type: $value\n"; + } + elsif ($oid =~ /BLADESPPALT-MIB::spTrapMsgText/) { + $briefmsg .= " Message: $value\n"; + } + elsif ($oid =~ /BLADESPPALT-MIB::spTrapBladeName/) { + my $temp="$value"; + $temp =~ /^\"(.*)\"/; + if ($1) { + $briefmsg .= " Blade Name: $value\n"; + $node1=$1; + } + } + elsif (($oid =~ /BLADESPPALT-MIB::spTrapSourceId/)) { + $briefmsg .= " Error Source=$value\n"; + } + elsif ($oid =~ /BLADESPPALT-MIB::spTrapPriority/) { + # Critical Alert(0), Major(1), Non-Critical Alert(2), System Alert(4), + # Recovery Alert(8), Informational Only Alert(255) + if ($value==0) { + $severity="Critical Alert"; + $severity_type="Critical"; + } elsif ($value==1) { + $severity="Major Alert"; + $severity_type="Critical"; + } elsif ($value==2) { + $severity="Non-Critical Alert"; + $severity_type="Warning"; + } elsif ($value==4) { + $severity="System Alert"; + $severity_type="Warning"; + } elsif ($value==8) { + $severity="Recovery Alert"; + $severity_type="Warning"; + } elsif ($value==255) { + $severity="Informational Alert"; + $severity_type="Informational"; + } + } + + # TODO: special handling for IPMI and RSA 2 +} + +if ($node1) {$info= getMoreInfo($node1);} + +($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time); +$datetime=sprintf "%2d-%02d-%04d %02d:%02d:%02d", $mon+1,$mday,$year+1900,$hour,$min,$sec; +my $head="SNMP $severity received from $host($ip) on $datetime\n$briefmsg\n"; +my $middle="Trap details:\n$message\n"; +my $end; +if ($info) { + $end ="Additonal Info from xCAT:\n$info\n"; +} + +#TODO: decide responses based on severity. settings in monitoring tb. +#email the full message to the alerts aliase +my $cmd="echo \'$head$middle$end\' \| mail -s \"$severity_type: Cluster SNMP Alert\!\" $MAILTO"; +`$cmd`; + +# TODO: log the the brief message from the Blad Center MM to the syslog. For other traps, log all. + + + + + +#-------------------------------------------------------------------------------- +=head3 getMoreInfo + This function returns the node module/type, serial number, position etc. + Arguments: + node-- name of the node. + Returns: + A string with node info ready to display. +=cut +#-------------------------------------------------------------------------------- +sub getMoreInfo { + my $node=shift; + my $pos,$vpd; + + #get module name and serial number from the xCAT DB. + my $table=xCAT::Table->new("vpd", -create =>1); + if ($table) { + (my $ref) = $table->getAttribs({'node' => $node}, ('serial', 'mtm')); + if ($ref) { + if($ref->{mtm}) { $vpd .= " Type/Mudule: ". $ref->{mtm} ."\n"; } + if($ref->{serial}) { $vpd .= " Serial Number: ". $ref->{serial} ."\n";} + } + $table->close(); + } + + # get the info from rinv command if nothing in the vpd table + if (!$vpd) { + my $result=`$XCATROOT/bin/rinv $node all 2>&1 | egrep -i '(model|serial)' | grep -v Univ`; + if ($? == 0) {#success + chomp($result); + my @b=split(/\n/, $result); + foreach (@b) { + s/^(.*)\:(.*)\:(.*)$/$2: $3/; + $vpd .= " $_\n"; + } + } + } + + + #get the position + my $table1=xCAT::Table->new("nodepos", -create =>1); + if ($table1) { + (my $ref1) = $table1->getAttribs({'node' => $node}, ('rack', 'u', 'chassis', 'slot', 'room')); + if ($ref1) { + if($ref1->{room}) { $pos .= " Room: ". $ref1->{room} ."\n"; } + if($ref1->{rack}) { $pos .= " Rack: ". $ref1->{rack} ."\n"; } + if($ref1->{u}) { $pos .= " Vertial position: ". $ref1->{u} ."\n"; } + if($ref1->{chassis}) { $pos .= " Chassis: ". $ref1->{chassis} ."\n"; } + if($ref1->{slot}) { $pos .= " Slot: ". $ref1->{slot} ."\n"; } + } + $table1->close(); + } + + if (($pos) || ($vpd)) { + return " Node: $node\n$vpd$pos"; + } + + return ""; +} + + + + + +