SNMP monitoring support. more to come.

git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@626 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd
This commit is contained in:
linggao 2008-02-28 20:21:35 +00:00
parent 9aeb214c33
commit e77ad65c55
2 changed files with 473 additions and 0 deletions

View File

@ -0,0 +1,302 @@
#!/usr/bin/env perl
# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html
package xCAT_monitoring::snmpmon;
BEGIN
{
$::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : '/opt/xcat';
}
use lib "$::XCATROOT/lib/perl";
use IO::File;
print "xCAT_monitoring::snmpmon loaded\n";
1;
#-------------------------------------------------------------------------------
=head1 xCAT_monitoring:snmpmon
=head2 Package Description
xCAT monitoring plugin package to handle SNMP monitoring.
=cut
#-------------------------------------------------------------------------------
#--------------------------------------------------------------------------------
=head3 start
This function gets called by the monitorctrl module
when xcatd starts.
Arguments:
monservers --A hash reference keyed by the monitoring server nodes
and each value is a ref to an array of [nodes, nodetype, status] arrays
monitored by the server. So the format is:
{monserver1=>[['node1', 'osi', 'active'], ['node2', 'switch', 'booting']...], ...}
Returns:
(return code, message)
=cut
#--------------------------------------------------------------------------------
sub start {
print "snmpmon::start called\n";
$noderef=shift;
if ($noderef =~ /xCAT_monitoring::snmpmon/) {
$noderef=shift;
}
# unless we are running on linux, exit.
#unless($^O eq "linux"){
# exit;
# }
# check supported snmp package
my $cmd;
my @snmpPkg = `/bin/rpm -qa | grep snmp`;
my $pkginstalled = grep(/net-snmp/, @snmpPkg);
if ($pkginstalled) {
my ($ret, $err)=configSNMP();
if ($ret != 0) { return ($ret, $err);}
} else {
return (1, "net-snmp is not installed")
}
return (0, "started")
}
#--------------------------------------------------------------------------------
=head3 configSNMP
This function puts xcat_traphanlder into the snmptrapd.conf file and
restarts the snmptrapd with the new configuration.
Arguments:
none.
Returns:
(return code, message)
=cut
#--------------------------------------------------------------------------------
sub configSNMP {
my $cmd;
# now move /usr/share/snmptrapd.conf to /usr/share/snmptrapd.conf.orig
# if it exists.
if (-f "/usr/share/snmp/snmptrapd.conf"){
# if the file exists and has references to xcat_traphandler then
# there is nothing that needs to be done.
`/bin/grep xcat_traphandler /usr/share/snmp/snmptrapd.conf > /dev/null`;
# if the return code is 1, then there is no xcat_traphandler
# references and we need to put them in.
if($? >> 8){
# back up the original file.
`/bin/cp -f /usr/share/snmp/snmptrapd.conf /usr/share/snmp/snmptrapd.conf.orig`;
# if the file exists and does not have "authCommunity execute public" then add it.
open(FILE1, "</usr/share/snmp/snmptrapd.conf");
open(FILE, ">/usr/share/snmp/snmptrapd.conf.tmp");
my $found=0;
while (readline(FILE1)) {
if (/\s*authCommunity.*public/) {
$found=1;
if (!/\s*authCommunity\s*.*execute.*public/) {
s/authCommunity\s*(.*)\s* public/authCommunity $1,execute public/; #modify it to have execute if found
}
}
print FILE $_;
}
if (!$found) {
print FILE "authCommunity execute public\n"; #add new one if not found
}
# now add the new traphandle commands:
print FILE "traphandle default $::XCATROOT/sbin/xcat_traphandler\n";
close($handle);
close(FILE);
`mv -f /usr/share/snmp/snmptrapd.conf.tmp /usr/share/snmp/snmptrapd.conf`;
}
}
else { # The snmptrapd.conf file does not exists
# create the file:
open($handle, ">/usr/share/snmp/snmptrapd.conf");
print $handle "authCommunity execute public\n";
print $handle "traphandle default $::XCATROOT/sbin/xcat_traphandler\n";
close($handle);
}
# TODO: put the mib files to /usr/share/snmp/mibs
# get the PID of the currently running snmptrapd if it is running.
# then stop it and restart it again so that it reads our new
# snmptrapd.conf configuration file. Then the process
chomp(my $pid= `/bin/ps -ef | /bin/grep snmptrapd | /bin/grep -v grep | /bin/awk '{print \$2}'`);
if($pid){
`/bin/kill -9 $pid`;
}
# start it up again!
system("/usr/sbin/snmptrapd -m ALL");
# get the PID of the currently running snmpd if it is running.
# if it's running then we just leave. Otherwise, if we don't get A PID, then we
# assume that it isn't running, and start it up again!
chomp(my $pid= `/bin/ps -ef | /bin/grep snmpd | /bin/grep -v grep | /bin/awk '{print \$2}'`);
unless($pid){
# start it up again!
system("/usr/sbin/snmpd");
}
return (0, "started");
}
#--------------------------------------------------------------------------------
=head3 stop
This function gets called by the monitorctrl module when
xcatd stops.
Arguments:
none
Returns:
(return code, message)
=cut
#--------------------------------------------------------------------------------
sub stop {
print "snmpmon::stop called\n";
if (-f "/usr/share/snmp/snmptrapd.conf.orig"){
# copy back the old one
`mv -f /usr/share/snmp/snmptrapd.conf.orig /usr/share/snmp/snmptrapd.conf`;
} else {
if (-f "/usr/share/snmp/snmptrapd.conf"){
# if the file exists, delete all entries that have xcat_traphandler
my $cmd = "grep -v xcat_traphandler /usr/share/snmp/snmptrapd.conf ";
$cmd .= "> /usr/share/snmp/snmptrapd.conf.unconfig ";
`$cmd`;
# move it back to the snmptrapd.conf file.
`mv -f /usr/share/snmp/snmptrapd.conf.unconfig /usr/share/snmp/snmptrapd.conf`;
}
}
# now check to see if the daemon is running. If it is then we need to restart
# it with the new snmptrapd.conf file that will not forward events to RMC.
my $pid="";
chomp($pid= `/bin/ps -ef | /bin/grep snmptrapd | /bin/grep -v grep | /bin/awk '{print $\2}'`);
if($pid){
`/bin/kill -9 $pid`;
# start it up again!
system("/usr/sbin/snmptrapd");
}
return (0, "stopped");
}
#--------------------------------------------------------------------------------
=head3 supportNodeStatusMon
This function is called by the monitorctrl module to check
if SNMP can help monitoring and returning the node status.
SNMP does not support this function.
Arguments:
none
Returns:
1
=cut
#--------------------------------------------------------------------------------
sub supportNodeStatusMon {
print "snmpmon::supportNodeStatusMon called\n";
return 0;
}
#--------------------------------------------------------------------------------
=head3 startNodeStatusMon
This function is called by the monitorctrl module to tell
SNMP to start monitoring the node status and feed them back
to xCAT. SNMP does not have this support.
Arguments:
monservers --A hash reference keyed by the monitoring server nodes
and each value is a ref to an array of [nodes, nodetype, status] arrays
monitored by the server. So the format is:
{monserver1=>[['node1', 'osi', 'active'], ['node2', 'switch', 'booting']...], ...}
Returns:
(return code, message)
=cut
#--------------------------------------------------------------------------------
sub startNodeStatusMon {
print "snmpmon::startNodeStatusMon called\n";
return (1, "This function is not supported.");
}
#--------------------------------------------------------------------------------
=head3 stopNodeStatusMon
This function is called by the monitorctrl module to tell
SNMP to stop feeding the node status info back to xCAT.
SNMP does not support this function.
Arguments:
none
Returns:
(return code, message)
=cut
#--------------------------------------------------------------------------------
sub stopNodeStatusMon {
print "snmpmon::stopNodeStatusMon called\n";
return (1, "This function is not supported.");
}
#--------------------------------------------------------------------------------
=head3 addNodes
This function adds the nodes into the SNMP domain.
Arguments:
nodes --nodes to be added. It is a hash reference keyed by the monitoring server
nodes and each value is a ref to an array of [nodes, nodetype, status] arrays monitored
by the server. So the format is:
{monserver1=>[['node1', 'osi', 'active'], ['node2', 'switch', 'booting']...], ...}
verbose -- verbose mode. 1 for yes, 0 for no.
Returns:
none
=cut
#--------------------------------------------------------------------------------
sub addNodes {
my $noderef=shift;
if ($noderef =~ /xCAT_monitoring::snmpmon/) {
$noderef=shift;
}
print "snmpmon::addNodes get called\n";
return 0;
}
#--------------------------------------------------------------------------------
=head3 removeNodes
This function removes the nodes from the SNMP domain.
Arguments:
nodes --nodes to be removed. It is a hash reference keyed by the monitoring server
nodes and each value is a ref to an array of [nodes, nodetype, status] arrays monitored
by the server. So the format is:
{monserver1=>[['node1', 'osi', 'active'], ['node2', 'switch', 'booting']...], ...}
verbose -- verbose mode. 1 for yes, 0 for no.
Returns:
none
=cut
#--------------------------------------------------------------------------------
sub removeNodes {
my $noderef=shift;
if ($noderef =~ /xCAT_monitoring::snmpmon/) {
$noderef=shift;
}
print "snmpmon::removeNodes called\n";
return 0;
}

View File

@ -0,0 +1,171 @@
#!/usr/bin/env perl
# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html
BEGIN
{
$::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : '/opt/xcat';
}
use lib "$::XCATROOT/lib/perl";
use Sys::Syslog;
use xCAT::Table;
# admin needs to create a mail aliase called alerts
# put "alerts: emailadd,emailaddr.." to /etc/aliases file
# then run newaliases command
my $MAILTO="alerts";
my $message;
my $briefmsg;
my $pair;
my $node1;
my $info;
my $severity_type;
my $severity;
my $host=<STDIN>;
chomp($host);
my $ip=<STDIN>;
chomp($ip);
while ($pair=<STDIN>) {
chomp($pair);
@a=split(/ /, $pair);
$oid=shift @a;
$value=join(' ', @a);
$message .= " $oid=$value\n";
#for BladeCenter MM traps, creat a brief message
if ($oid =~ /BLADESPPALT-MIB::spTrapAppId/) {
$briefmsg .= " App ID: $value\n";
}
elsif (($oid =~ /BLADESPPALT-MIB::spTrapAppType/) && ($value)) {
$briefmsg .= " Alert Type: $value\n";
}
elsif ($oid =~ /BLADESPPALT-MIB::spTrapMsgText/) {
$briefmsg .= " Message: $value\n";
}
elsif ($oid =~ /BLADESPPALT-MIB::spTrapBladeName/) {
my $temp="$value";
$temp =~ /^\"(.*)\"/;
if ($1) {
$briefmsg .= " Blade Name: $value\n";
$node1=$1;
}
}
elsif (($oid =~ /BLADESPPALT-MIB::spTrapSourceId/)) {
$briefmsg .= " Error Source=$value\n";
}
elsif ($oid =~ /BLADESPPALT-MIB::spTrapPriority/) {
# Critical Alert(0), Major(1), Non-Critical Alert(2), System Alert(4),
# Recovery Alert(8), Informational Only Alert(255)
if ($value==0) {
$severity="Critical Alert";
$severity_type="Critical";
} elsif ($value==1) {
$severity="Major Alert";
$severity_type="Critical";
} elsif ($value==2) {
$severity="Non-Critical Alert";
$severity_type="Warning";
} elsif ($value==4) {
$severity="System Alert";
$severity_type="Warning";
} elsif ($value==8) {
$severity="Recovery Alert";
$severity_type="Warning";
} elsif ($value==255) {
$severity="Informational Alert";
$severity_type="Informational";
}
}
# TODO: special handling for IPMI and RSA 2
}
if ($node1) {$info= getMoreInfo($node1);}
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time);
$datetime=sprintf "%2d-%02d-%04d %02d:%02d:%02d", $mon+1,$mday,$year+1900,$hour,$min,$sec;
my $head="SNMP $severity received from $host($ip) on $datetime\n$briefmsg\n";
my $middle="Trap details:\n$message\n";
my $end;
if ($info) {
$end ="Additonal Info from xCAT:\n$info\n";
}
#TODO: decide responses based on severity. settings in monitoring tb.
#email the full message to the alerts aliase
my $cmd="echo \'$head$middle$end\' \| mail -s \"$severity_type: Cluster SNMP Alert\!\" $MAILTO";
`$cmd`;
# TODO: log the the brief message from the Blad Center MM to the syslog. For other traps, log all.
#--------------------------------------------------------------------------------
=head3 getMoreInfo
This function returns the node module/type, serial number, position etc.
Arguments:
node-- name of the node.
Returns:
A string with node info ready to display.
=cut
#--------------------------------------------------------------------------------
sub getMoreInfo {
my $node=shift;
my $pos,$vpd;
#get module name and serial number from the xCAT DB.
my $table=xCAT::Table->new("vpd", -create =>1);
if ($table) {
(my $ref) = $table->getAttribs({'node' => $node}, ('serial', 'mtm'));
if ($ref) {
if($ref->{mtm}) { $vpd .= " Type/Mudule: ". $ref->{mtm} ."\n"; }
if($ref->{serial}) { $vpd .= " Serial Number: ". $ref->{serial} ."\n";}
}
$table->close();
}
# get the info from rinv command if nothing in the vpd table
if (!$vpd) {
my $result=`$XCATROOT/bin/rinv $node all 2>&1 | egrep -i '(model|serial)' | grep -v Univ`;
if ($? == 0) {#success
chomp($result);
my @b=split(/\n/, $result);
foreach (@b) {
s/^(.*)\:(.*)\:(.*)$/$2: $3/;
$vpd .= " $_\n";
}
}
}
#get the position
my $table1=xCAT::Table->new("nodepos", -create =>1);
if ($table1) {
(my $ref1) = $table1->getAttribs({'node' => $node}, ('rack', 'u', 'chassis', 'slot', 'room'));
if ($ref1) {
if($ref1->{room}) { $pos .= " Room: ". $ref1->{room} ."\n"; }
if($ref1->{rack}) { $pos .= " Rack: ". $ref1->{rack} ."\n"; }
if($ref1->{u}) { $pos .= " Vertial position: ". $ref1->{u} ."\n"; }
if($ref1->{chassis}) { $pos .= " Chassis: ". $ref1->{chassis} ."\n"; }
if($ref1->{slot}) { $pos .= " Slot: ". $ref1->{slot} ."\n"; }
}
$table1->close();
}
if (($pos) || ($vpd)) {
return " Node: $node\n$vpd$pos";
}
return "";
}