event batching support for node status monitoring using RMC

git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@2844 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd
This commit is contained in:
linggao 2009-03-06 16:30:03 +00:00
parent c7adfdeb74
commit a4330ee00e
6 changed files with 309 additions and 83 deletions

View File

@ -613,25 +613,52 @@ sub startNodeStatusMon {
reportError($retmsg, $callback);
}
if (!$isSV) {
#start monitoring the status of mn's immediate children
my $result=`startcondresp NodeReachability UpdatexCATNodeStatus 2>&1`;
if (($?) && ($result !~ /2618-244|2618-008/)) { #started
$retcode=$?;
$retmsg="Error start node status monitoring: $result";
reportError($retmsg, $callback);
}
#start monitoring the status of mn's grandchildren via their service nodes
$result=`startcondresp NodeReachability_H UpdatexCATNodeStatus 2>&1`;
if (($?) && ($result !~ /2618-244|2618-008/)) { #started
$retcode=$?;
$retmsg="Error start node status monitoring: $result";
reportError($retmsg, $callback);
}
#get rsct version
my $result=`/usr/sbin/rsct/install/bin/ctversion`;
my $rsct_ver="0.0.0";
if (!$?) {
chomp($result);
my @tempa=split(/ /, $result);
if (@tempa>1) {
$rsct_ver=$tempa[1];
}
}
#get setting from the monsetting table
my $batch=0;
if (xCAT::Utils->CheckVersion($rsct_ver, "2.3.5.0") >= 0) {
my %settings=xCAT_monitoring::monitorctrl->getPluginSettings("rmcmon");
my $batch_string=$settings{'nodeusebatch'};
if ($batch_string =~ /1|Yes|yes|YES|Y|y/ ) { $batch=1;}
}
if ($batch) {
my $result=`startcondresp NodeReachability_Batch UpdatexCATNodeStatus_Batch 2>&1`;
if (($?) && ($result !~ /2618-244|2618-008/)) { #started
$retcode=$?;
$retmsg="Error start node status monitoring: $result";
reportError($retmsg, $callback);
}
} else {
if (!$isSV) {
#start monitoring the status of mn's immediate children
my $result=`startcondresp NodeReachability UpdatexCATNodeStatus 2>&1`;
if (($?) && ($result !~ /2618-244|2618-008/)) { #started
$retcode=$?;
$retmsg="Error start node status monitoring: $result";
reportError($retmsg, $callback);
}
#start monitoring the status of mn's grandchildren via their service nodes
$result=`startcondresp NodeReachability_H UpdatexCATNodeStatus 2>&1`;
if (($?) && ($result !~ /2618-244|2618-008/)) { #started
$retcode=$?;
$retmsg="Error start node status monitoring: $result";
reportError($retmsg, $callback);
}
}
}
return ($retcode, $retmsg);
}
@ -740,24 +767,54 @@ sub stopNodeStatusMon {
my $retmsg="";
my $isSV=xCAT::Utils->isServiceNode();
if ($isSV) { return ($retcode, $retmsg); }
my $localhostname=hostname();
#stop monitoring the status of mn's immediate children
my $result=`stopcondresp NodeReachability UpdatexCATNodeStatus 2>&1`;
if (($?) && ($result !~ /2618-264/)) { #stoped
$retcode=$?;
$retmsg="Error stop node status monitoring: $result";
reportError($retmsg, $callback);
#get rsct version
my $result=`/usr/sbin/rsct/install/bin/ctversion`;
my $rsct_ver="0.0.0";
if (!$?) {
chomp($result);
my @tempa=split(/ /, $result);
if (@tempa>1) {
$rsct_ver=$tempa[1];
}
}
#get setting from the monsetting table
my $batch=0;
if (xCAT::Utils->CheckVersion($rsct_ver, "2.3.5.0") >= 0) {
my %settings=xCAT_monitoring::monitorctrl->getPluginSettings("rmcmon");
my $batch_string=$settings{'nodeusebatch'};
if ($batch_string =~ /1|Yes|yes|YES|Y|y/ ) { $batch=1;}
}
if ($batch) {
my $result=`stopcondresp NodeReachability_Batch UpdatexCATNodeStatus_Batch 2>&1`;
if (($?) && ($result !~ /2618-264/)) { #stoped
$retcode=$?;
$retmsg="Error stop node status monitoring: $result";
reportError($retmsg, $callback);
}
} else {
if ($isSV) { return ($retcode, $retmsg); }
#stop monitoring the status of mn's immediate children
my $result=`stopcondresp NodeReachability UpdatexCATNodeStatus 2>&1`;
if (($?) && ($result !~ /2618-264/)) { #stoped
$retcode=$?;
$retmsg="Error stop node status monitoring: $result";
reportError($retmsg, $callback);
}
#stop monitoring the status of mn's grandchildren via their service nodes
$result=`stopcondresp NodeReachability_H UpdatexCATNodeStatus 2>&1`;
if (($?) && ($result !~ /2618-264/)) { #stoped
$retcode=$?;
$retmsg="Error stop node status monitoring: $result";
reportError($retmsg, $callback);
}
}
#stop monitoring the status of mn's grandchildren via their service nodes
$result=`stopcondresp NodeReachability_H UpdatexCATNodeStatus 2>&1`;
if (($?) && ($result !~ /2618-264/)) { #stoped
$retcode=$?;
$retmsg="Error stop node status monitoring: $result";
reportError($retmsg, $callback);
}
return ($retcode, $retmsg);
}
@ -1277,7 +1334,9 @@ sub getDescription {
monitoring domain for RMC during node deployment time.
Settings:
rfanout -- indicating the fanout number for configuring or deconfiguring
remote nodes.";
remote nodes.
nodeusebatch -- use RMC event batching function when doing node status monitoring.
";
}
#--------------------------------------------------------------------------------

View File

@ -0,0 +1,29 @@
#!/usr/bin/perl
BEGIN
{
$::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : '/opt/xcat';
}
use lib "$::XCATROOT/lib/perl";
use xCAT::Utils;
if (exists($ENV{RSCT_VER})) {
my $rsct_ver=$ENV{RSCT_VER};
if (xCAT::Utils->CheckVersion($rsct_ver, "2.3.5.0") < 0) {
exit(0);
}
}
$RES::Condition{'NodeReachability_Batch'} = {
Name => q(NodeReachability_Batch),
ResourceClass => q(IBM.MngNode),
EventExpression => q(Status!=Status@P),
EventDescription => q(An event will be generated when a status changes),
ManagementScope => q(1),
EventBatchingInterval => q(60),
EventBatchingMaxEvents => q(200),
Severity => q(2),
};
1;

View File

@ -0,0 +1,20 @@
#!/usr/bin/perl
BEGIN
{
$::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : '/opt/xcat';
}
use lib "$::XCATROOT/lib/perl";
use xCAT::Utils;
if (exists($ENV{RSCT_VER})) {
my $rsct_ver=$ENV{RSCT_VER};
if (xCAT::Utils->CheckVersion($rsct_ver, "2.3.5.0") < 0) { exit 0;}
}
$RES::EventResponse{'UpdatexCATNodeStatus_Batch'} = {
Name => q(UpdatexCATNodeStatus_Batch),
Locked => q(0),
EventBatching => q(1),
Actions => q({[updatexCAT,{127},{0},{86400},/opt/xcat/sbin/rmcmon/updatexcatnodestatus,3,0,0,0,{},0]}),
};
1;

View File

@ -0,0 +1,39 @@
#!/usr/bin/env perl
BEGIN
{
$::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : '/opt/xcat';
}
use lib "$::XCATROOT/lib/perl";
open(FILE, ">>/var/log/logevent_simple.log") or dir ("cannot open the file\n");
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time);
printf FILE "\n-----------%2d-%02d-%04d %02d:%02d:%02d-----------\n", $mon+1,$mday,$year+1900,$hour,$min,$sec;
my $respname=$ENV{ERRM_ER_NAME};
my $cond_name=$ENV{ERRM_COND_NAME};
my $batch=0;
if (exists($ENV{ERRM_COND_BATCH})) { $batch=$ENV{ERRM_COND_BATCH}; }
if ($batch) {
if ($ENV{ERRM_COND_BATCH_NUM} > 0) {
#check if event detail file exist
if (!exists($ENV{ERRM_EVENT_DETAIL_FILE})){
xCAT::MsgUtils->message('S', "logeventtoxcat: no event detail file specified in the response $respname for condition $cond_name.\n");
exit (1);
}
my $filename=$ENV{ERRM_EVENT_DETAIL_FILE};
if (! -f $filename) {
xCAT::MsgUtils->message('S', "logeventtoxcat: cannot find event detail file $filename in response $respname for condition $cond_name.\n");
exit (1);
}
open(FILE1, "<$filename");
my $line1=readline(FILE1);
my @aTemp=split(/=/, $line1);
my $num_events=$aTemp[1];
close(FILE1);
print FILE "Total events=$num_events\n\n";
}
}
close(FILE);
return 0;

View File

@ -63,7 +63,7 @@ if (!$batch) { #handle single event
my $count;
for ($count = 1; $count <= $num_events; $count++) {
my $content=`sed -n "/Event $count/, /(null)/ p" $filename`;
my $content=`sed -n "/Event $count\$/, /(null)/ p" $filename`;
my @content_array=split(/\n/, $content);
pop(@content_array); #get rid of last line

View File

@ -13,62 +13,141 @@ use strict;
use Getopt::Std;
use POSIX qw(strftime);
my $respname=$ENV{ERRM_ER_NAME};
my $cond_name=$ENV{ERRM_COND_NAME};
my $node;
my $status;
my $batch=0;
if (exists($ENV{ERRM_COND_BATCH})) { $batch=$ENV{ERRM_COND_BATCH}; }
if ($cond_name eq "NodeReachability") {
$node=$ENV{ERRM_RSRC_NAME};
$status=$ENV{ERRM_VALUE};
} elsif ($cond_name eq "NodeReachability_H") {
# Parse the ERRM_VALUE attribute, which will contain the
# LastEvent structured data variable from the Condition class
# The fields in this structured data variable are documented below where we parse them out.
my $event = $ENV{ERRM_VALUE};
$event =~ s/^\[(.*)\]$/$1/; # SD variables have square brackets around them
if (!$batch) {
my $node;
my $status;
if ($cond_name eq "NodeReachability") {
$node=$ENV{ERRM_RSRC_NAME};
$status=$ENV{ERRM_VALUE};
} elsif ($cond_name eq "NodeReachability_H") {
# Parse the ERRM_VALUE attribute, which will contain the
# LastEvent structured data variable from the Condition class
# The fields in this structured data variable are documented below where we parse them out.
my $event = $ENV{ERRM_VALUE};
$event =~ s/^\[(.*)\]$/$1/; # SD variables have square brackets around them
# This parse the LastEvent
my ( # split the SD into the following fields:
$Occurred, # One if the condition has been triggered
$ErrNum, # Non-zero if there was in error in the event registration
$ErrMsg, # The string msg related to ErrNum
$EventFlags, # Bit mask giving some additional info about the event
$EventTime, # Time of event expressed in seconds since 1/1/1970
$EventTimeMicros, # Number of microseconds past EventTime
$ResourceHandle, # Binary address of the RMC resource that caused the condition to be triggered
$NodeName, # The node on which the event occurred. For conditions that use the management domain scope (4),
# this will be the leaf node. For conditions that use the local scope (e.g. NodeReachability),
# this will be the FMS.
$NumAttrs, # Number of attr values from the resource returned in this event
$NumAttrsInExpr, # How many of the above were attributes in the event expression
$IndexForAttrs, # The starting index of the array of values. Until new fixed fields are added
# to LastEvent, this will be the element right after this one.
$AttrArray # This list of attribute names, types, and values
) = split(/,/, $event, 12);
my @attrArray = split(/,/, $AttrArray); # Note: parsing this way does not support SDs or SD Arrays that may be in this list
my $j = 0; # index into attrArray
for (my $i=0; $i<$NumAttrs; $i++) {
my $attrName = $attrArray[$j++];
my $attrType = $attrArray[$j++]; # Types <= 8 are "simple" types. Types > 8 are SDs and arrays.
my $attrValue = $attrArray[$j++];
if ($attrName eq '"Name"') { $node = $attrValue; }
if ($attrName eq '"Status"') { $status = $attrValue; }
}
} else {
`logger xCAT "[mon]: updatexcatnodestatus: This script does not handle condition $cond_name"`;
exit 1;
}
my $status_string;
if ($status == 1) { $status_string="alive"; }
else { $status_string="unreachable"; }
my $result=`$::XCATROOT/sbin/chtab node=$node nodelist.status=$status_string 2>&1`;
my $code=$?;
if ($code) {
`logger xCAT "[mon]: Error saving node status ($node,$status_string) to xCAT:$result"`;
exit $code;
}
} else { #batch event
if ($cond_name ne "NodeReachability_Batch") {
`logger xCAT "[mon]: updatexcatnodestatus: This script does not handle condition $cond_name"`;
exit 1;
}
if ($ENV{ERRM_COND_BATCH_NUM} > 0) {
#check if event detail file exist
if (!exists($ENV{ERRM_EVENT_DETAIL_FILE})){
xCAT::MsgUtils->message('S', "logeventtoxcat: no event detail file specified in the response $respname for condition $cond_name.\n");
exit (1);
}
my $filename=$ENV{ERRM_EVENT_DETAIL_FILE};
if (! -f $filename) {
xCAT::MsgUtils->message('S', "logeventtoxcat: cannot find event detail file $filename in response $respname for condition $cond_name.\n");
exit (1);
}
# This parse the LastEvent
my ( # split the SD into the following fields:
$Occurred, # One if the condition has been triggered
$ErrNum, # Non-zero if there was in error in the event registration
$ErrMsg, # The string msg related to ErrNum
$EventFlags, # Bit mask giving some additional info about the event
$EventTime, # Time of event expressed in seconds since 1/1/1970
$EventTimeMicros, # Number of microseconds past EventTime
$ResourceHandle, # Binary address of the RMC resource that caused the condition to be triggered
$NodeName, # The node on which the event occurred. For conditions that use the management domain scope (4),
# this will be the leaf node. For conditions that use the local scope (e.g. NodeReachability),
# this will be the FMS.
$NumAttrs, # Number of attr values from the resource returned in this event
$NumAttrsInExpr, # How many of the above were attributes in the event expression
$IndexForAttrs, # The starting index of the array of values. Until new fixed fields are added
# to LastEvent, this will be the element right after this one.
$AttrArray # This list of attribute names, types, and values
) = split(/,/, $event, 12);
open(FILE1, "<$filename");
my $line1=readline(FILE1);
my @aTemp=split(/=/, $line1);
my $num_events=$aTemp[1];
close(FILE1);
my @attrArray = split(/,/, $AttrArray); # Note: parsing this way does not support SDs or SD Arrays that may be in this list
my $count;
my @active=();
my @inactive=();
my %new_value=();
for ($count = 1; $count <= $num_events; $count++) {
my $content=`sed -n "/Event $count\$/, /(null)/ p" $filename`;
my @content_array=split(/\n/, $content);
pop(@content_array); #get rid of last line
shift(@content_array); #get rid of firt line
my %content_hash=();
foreach(@content_array) {
/([^\=]+)\=(.*)/;
$content_hash{$1}=$2;
}
my $j = 0; # index into attrArray
for (my $i=0; $i<$NumAttrs; $i++) {
my $attrName = $attrArray[$j++];
my $attrType = $attrArray[$j++]; # Types <= 8 are "simple" types. Types > 8 are SDs and arrays.
my $attrValue = $attrArray[$j++];
if ($attrName eq '"Name"') { $node = $attrValue; }
if ($attrName eq '"Status"') { $status = $attrValue; }
}
} else {
`logger xCAT "[mon]: updatexcatnodestatus: This script does not handle condition $cond_name"`;
exit 1;
}
my $status_string;
if ($status == 1) { $status_string="alive"; }
else { $status_string="unreachable"; }
my $node;
my $status;
my $status_string;
$node=$content_hash{ERRM_RSRC_NAME};
$status=$content_hash{ERRM_VALUE};
my $result=`$::XCATROOT/bin/nodech $node nodelist.status=$status_string 2>&1`;
my $code=$?;
if ($code) {
`logger xCAT "[mon]: Error saving node status ($node,$status_string) to xCAT:$result"`;
exit $code;
if ($status == 1) { $new_value{$node}=1; }
else { $new_value{$node}=0; }
} #end for
foreach my $node (keys %new_value) {
if ($new_value{$node} == 1) { push(@active, $node);}
else { push(@inactive, $node);}
} #end foreach
if (@active > 0) {
my $node_string=join(',',@active);
my $result=`XCATBYPASS=Y $::XCATROOT/bin/nodech $node_string nodelist.status=active 2>&1`;
my $code=$?;
if ($code) {
`logger xCAT "[mon]: Error saving node status ($node_string,active) to xCAT:$result"`;
}
}
if (@inactive > 0) {
my $node_string=join(',',@inactive);
my $result=`XCATBYPASS=Y $::XCATROOT/bin/nodech $node_string nodelist.status=inactive 2>&1`;
my $code=$?;
if ($code) {
`logger xCAT "[mon]: Error saving node status ($node_string,inactive) to xCAT:$result"`;
}
}
}
}
exit 0