event batching support for node status monitoring using RMC
git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@2844 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd
This commit is contained in:
parent
c7adfdeb74
commit
a4330ee00e
@ -613,25 +613,52 @@ sub startNodeStatusMon {
|
||||
reportError($retmsg, $callback);
|
||||
}
|
||||
|
||||
|
||||
if (!$isSV) {
|
||||
#start monitoring the status of mn's immediate children
|
||||
my $result=`startcondresp NodeReachability UpdatexCATNodeStatus 2>&1`;
|
||||
if (($?) && ($result !~ /2618-244|2618-008/)) { #started
|
||||
$retcode=$?;
|
||||
$retmsg="Error start node status monitoring: $result";
|
||||
reportError($retmsg, $callback);
|
||||
}
|
||||
|
||||
#start monitoring the status of mn's grandchildren via their service nodes
|
||||
$result=`startcondresp NodeReachability_H UpdatexCATNodeStatus 2>&1`;
|
||||
if (($?) && ($result !~ /2618-244|2618-008/)) { #started
|
||||
$retcode=$?;
|
||||
$retmsg="Error start node status monitoring: $result";
|
||||
reportError($retmsg, $callback);
|
||||
}
|
||||
#get rsct version
|
||||
my $result=`/usr/sbin/rsct/install/bin/ctversion`;
|
||||
my $rsct_ver="0.0.0";
|
||||
if (!$?) {
|
||||
chomp($result);
|
||||
my @tempa=split(/ /, $result);
|
||||
if (@tempa>1) {
|
||||
$rsct_ver=$tempa[1];
|
||||
}
|
||||
}
|
||||
|
||||
#get setting from the monsetting table
|
||||
my $batch=0;
|
||||
if (xCAT::Utils->CheckVersion($rsct_ver, "2.3.5.0") >= 0) {
|
||||
my %settings=xCAT_monitoring::monitorctrl->getPluginSettings("rmcmon");
|
||||
|
||||
my $batch_string=$settings{'nodeusebatch'};
|
||||
if ($batch_string =~ /1|Yes|yes|YES|Y|y/ ) { $batch=1;}
|
||||
}
|
||||
|
||||
if ($batch) {
|
||||
my $result=`startcondresp NodeReachability_Batch UpdatexCATNodeStatus_Batch 2>&1`;
|
||||
if (($?) && ($result !~ /2618-244|2618-008/)) { #started
|
||||
$retcode=$?;
|
||||
$retmsg="Error start node status monitoring: $result";
|
||||
reportError($retmsg, $callback);
|
||||
}
|
||||
} else {
|
||||
if (!$isSV) {
|
||||
#start monitoring the status of mn's immediate children
|
||||
my $result=`startcondresp NodeReachability UpdatexCATNodeStatus 2>&1`;
|
||||
if (($?) && ($result !~ /2618-244|2618-008/)) { #started
|
||||
$retcode=$?;
|
||||
$retmsg="Error start node status monitoring: $result";
|
||||
reportError($retmsg, $callback);
|
||||
}
|
||||
|
||||
#start monitoring the status of mn's grandchildren via their service nodes
|
||||
$result=`startcondresp NodeReachability_H UpdatexCATNodeStatus 2>&1`;
|
||||
if (($?) && ($result !~ /2618-244|2618-008/)) { #started
|
||||
$retcode=$?;
|
||||
$retmsg="Error start node status monitoring: $result";
|
||||
reportError($retmsg, $callback);
|
||||
}
|
||||
}
|
||||
}
|
||||
return ($retcode, $retmsg);
|
||||
}
|
||||
|
||||
@ -740,24 +767,54 @@ sub stopNodeStatusMon {
|
||||
my $retmsg="";
|
||||
|
||||
my $isSV=xCAT::Utils->isServiceNode();
|
||||
if ($isSV) { return ($retcode, $retmsg); }
|
||||
my $localhostname=hostname();
|
||||
|
||||
#stop monitoring the status of mn's immediate children
|
||||
my $result=`stopcondresp NodeReachability UpdatexCATNodeStatus 2>&1`;
|
||||
if (($?) && ($result !~ /2618-264/)) { #stoped
|
||||
$retcode=$?;
|
||||
$retmsg="Error stop node status monitoring: $result";
|
||||
reportError($retmsg, $callback);
|
||||
#get rsct version
|
||||
my $result=`/usr/sbin/rsct/install/bin/ctversion`;
|
||||
my $rsct_ver="0.0.0";
|
||||
if (!$?) {
|
||||
chomp($result);
|
||||
my @tempa=split(/ /, $result);
|
||||
if (@tempa>1) {
|
||||
$rsct_ver=$tempa[1];
|
||||
}
|
||||
}
|
||||
|
||||
#get setting from the monsetting table
|
||||
my $batch=0;
|
||||
if (xCAT::Utils->CheckVersion($rsct_ver, "2.3.5.0") >= 0) {
|
||||
my %settings=xCAT_monitoring::monitorctrl->getPluginSettings("rmcmon");
|
||||
|
||||
my $batch_string=$settings{'nodeusebatch'};
|
||||
if ($batch_string =~ /1|Yes|yes|YES|Y|y/ ) { $batch=1;}
|
||||
}
|
||||
if ($batch) {
|
||||
my $result=`stopcondresp NodeReachability_Batch UpdatexCATNodeStatus_Batch 2>&1`;
|
||||
if (($?) && ($result !~ /2618-264/)) { #stoped
|
||||
$retcode=$?;
|
||||
$retmsg="Error stop node status monitoring: $result";
|
||||
reportError($retmsg, $callback);
|
||||
}
|
||||
|
||||
} else {
|
||||
if ($isSV) { return ($retcode, $retmsg); }
|
||||
|
||||
#stop monitoring the status of mn's immediate children
|
||||
my $result=`stopcondresp NodeReachability UpdatexCATNodeStatus 2>&1`;
|
||||
if (($?) && ($result !~ /2618-264/)) { #stoped
|
||||
$retcode=$?;
|
||||
$retmsg="Error stop node status monitoring: $result";
|
||||
reportError($retmsg, $callback);
|
||||
}
|
||||
#stop monitoring the status of mn's grandchildren via their service nodes
|
||||
$result=`stopcondresp NodeReachability_H UpdatexCATNodeStatus 2>&1`;
|
||||
if (($?) && ($result !~ /2618-264/)) { #stoped
|
||||
$retcode=$?;
|
||||
$retmsg="Error stop node status monitoring: $result";
|
||||
reportError($retmsg, $callback);
|
||||
}
|
||||
}
|
||||
|
||||
#stop monitoring the status of mn's grandchildren via their service nodes
|
||||
$result=`stopcondresp NodeReachability_H UpdatexCATNodeStatus 2>&1`;
|
||||
if (($?) && ($result !~ /2618-264/)) { #stoped
|
||||
$retcode=$?;
|
||||
$retmsg="Error stop node status monitoring: $result";
|
||||
reportError($retmsg, $callback);
|
||||
}
|
||||
return ($retcode, $retmsg);
|
||||
}
|
||||
|
||||
@ -1277,7 +1334,9 @@ sub getDescription {
|
||||
monitoring domain for RMC during node deployment time.
|
||||
Settings:
|
||||
rfanout -- indicating the fanout number for configuring or deconfiguring
|
||||
remote nodes.";
|
||||
remote nodes.
|
||||
nodeusebatch -- use RMC event batching function when doing node status monitoring.
|
||||
";
|
||||
}
|
||||
|
||||
#--------------------------------------------------------------------------------
|
||||
|
@ -0,0 +1,29 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
BEGIN
|
||||
{
|
||||
$::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : '/opt/xcat';
|
||||
}
|
||||
use lib "$::XCATROOT/lib/perl";
|
||||
use xCAT::Utils;
|
||||
|
||||
if (exists($ENV{RSCT_VER})) {
|
||||
my $rsct_ver=$ENV{RSCT_VER};
|
||||
if (xCAT::Utils->CheckVersion($rsct_ver, "2.3.5.0") < 0) {
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
$RES::Condition{'NodeReachability_Batch'} = {
|
||||
Name => q(NodeReachability_Batch),
|
||||
ResourceClass => q(IBM.MngNode),
|
||||
EventExpression => q(Status!=Status@P),
|
||||
EventDescription => q(An event will be generated when a status changes),
|
||||
ManagementScope => q(1),
|
||||
EventBatchingInterval => q(60),
|
||||
EventBatchingMaxEvents => q(200),
|
||||
Severity => q(2),
|
||||
};
|
||||
|
||||
|
||||
1;
|
@ -0,0 +1,20 @@
|
||||
#!/usr/bin/perl
|
||||
BEGIN
|
||||
{
|
||||
$::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : '/opt/xcat';
|
||||
}
|
||||
use lib "$::XCATROOT/lib/perl";
|
||||
use xCAT::Utils;
|
||||
|
||||
if (exists($ENV{RSCT_VER})) {
|
||||
my $rsct_ver=$ENV{RSCT_VER};
|
||||
if (xCAT::Utils->CheckVersion($rsct_ver, "2.3.5.0") < 0) { exit 0;}
|
||||
}
|
||||
|
||||
$RES::EventResponse{'UpdatexCATNodeStatus_Batch'} = {
|
||||
Name => q(UpdatexCATNodeStatus_Batch),
|
||||
Locked => q(0),
|
||||
EventBatching => q(1),
|
||||
Actions => q({[updatexCAT,{127},{0},{86400},/opt/xcat/sbin/rmcmon/updatexcatnodestatus,3,0,0,0,{},0]}),
|
||||
};
|
||||
1;
|
39
xCAT-rmc/scripts/logevent_simple
Executable file
39
xCAT-rmc/scripts/logevent_simple
Executable file
@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env perl
|
||||
BEGIN
|
||||
{
|
||||
$::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : '/opt/xcat';
|
||||
}
|
||||
use lib "$::XCATROOT/lib/perl";
|
||||
|
||||
open(FILE, ">>/var/log/logevent_simple.log") or dir ("cannot open the file\n");
|
||||
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time);
|
||||
printf FILE "\n-----------%2d-%02d-%04d %02d:%02d:%02d-----------\n", $mon+1,$mday,$year+1900,$hour,$min,$sec;
|
||||
|
||||
my $respname=$ENV{ERRM_ER_NAME};
|
||||
my $cond_name=$ENV{ERRM_COND_NAME};
|
||||
my $batch=0;
|
||||
if (exists($ENV{ERRM_COND_BATCH})) { $batch=$ENV{ERRM_COND_BATCH}; }
|
||||
if ($batch) {
|
||||
if ($ENV{ERRM_COND_BATCH_NUM} > 0) {
|
||||
#check if event detail file exist
|
||||
if (!exists($ENV{ERRM_EVENT_DETAIL_FILE})){
|
||||
xCAT::MsgUtils->message('S', "logeventtoxcat: no event detail file specified in the response $respname for condition $cond_name.\n");
|
||||
exit (1);
|
||||
}
|
||||
|
||||
my $filename=$ENV{ERRM_EVENT_DETAIL_FILE};
|
||||
if (! -f $filename) {
|
||||
xCAT::MsgUtils->message('S', "logeventtoxcat: cannot find event detail file $filename in response $respname for condition $cond_name.\n");
|
||||
exit (1);
|
||||
}
|
||||
open(FILE1, "<$filename");
|
||||
my $line1=readline(FILE1);
|
||||
my @aTemp=split(/=/, $line1);
|
||||
my $num_events=$aTemp[1];
|
||||
close(FILE1);
|
||||
print FILE "Total events=$num_events\n\n";
|
||||
}
|
||||
}
|
||||
close(FILE);
|
||||
return 0;
|
||||
|
@ -63,7 +63,7 @@ if (!$batch) { #handle single event
|
||||
|
||||
my $count;
|
||||
for ($count = 1; $count <= $num_events; $count++) {
|
||||
my $content=`sed -n "/Event $count/, /(null)/ p" $filename`;
|
||||
my $content=`sed -n "/Event $count\$/, /(null)/ p" $filename`;
|
||||
|
||||
my @content_array=split(/\n/, $content);
|
||||
pop(@content_array); #get rid of last line
|
||||
|
@ -13,62 +13,141 @@ use strict;
|
||||
use Getopt::Std;
|
||||
use POSIX qw(strftime);
|
||||
|
||||
my $respname=$ENV{ERRM_ER_NAME};
|
||||
my $cond_name=$ENV{ERRM_COND_NAME};
|
||||
my $node;
|
||||
my $status;
|
||||
my $batch=0;
|
||||
if (exists($ENV{ERRM_COND_BATCH})) { $batch=$ENV{ERRM_COND_BATCH}; }
|
||||
|
||||
if ($cond_name eq "NodeReachability") {
|
||||
$node=$ENV{ERRM_RSRC_NAME};
|
||||
$status=$ENV{ERRM_VALUE};
|
||||
} elsif ($cond_name eq "NodeReachability_H") {
|
||||
# Parse the ERRM_VALUE attribute, which will contain the
|
||||
# LastEvent structured data variable from the Condition class
|
||||
# The fields in this structured data variable are documented below where we parse them out.
|
||||
my $event = $ENV{ERRM_VALUE};
|
||||
$event =~ s/^\[(.*)\]$/$1/; # SD variables have square brackets around them
|
||||
if (!$batch) {
|
||||
my $node;
|
||||
my $status;
|
||||
if ($cond_name eq "NodeReachability") {
|
||||
$node=$ENV{ERRM_RSRC_NAME};
|
||||
$status=$ENV{ERRM_VALUE};
|
||||
} elsif ($cond_name eq "NodeReachability_H") {
|
||||
# Parse the ERRM_VALUE attribute, which will contain the
|
||||
# LastEvent structured data variable from the Condition class
|
||||
# The fields in this structured data variable are documented below where we parse them out.
|
||||
my $event = $ENV{ERRM_VALUE};
|
||||
$event =~ s/^\[(.*)\]$/$1/; # SD variables have square brackets around them
|
||||
|
||||
# This parse the LastEvent
|
||||
my ( # split the SD into the following fields:
|
||||
$Occurred, # One if the condition has been triggered
|
||||
$ErrNum, # Non-zero if there was in error in the event registration
|
||||
$ErrMsg, # The string msg related to ErrNum
|
||||
$EventFlags, # Bit mask giving some additional info about the event
|
||||
$EventTime, # Time of event expressed in seconds since 1/1/1970
|
||||
$EventTimeMicros, # Number of microseconds past EventTime
|
||||
$ResourceHandle, # Binary address of the RMC resource that caused the condition to be triggered
|
||||
$NodeName, # The node on which the event occurred. For conditions that use the management domain scope (4),
|
||||
# this will be the leaf node. For conditions that use the local scope (e.g. NodeReachability),
|
||||
# this will be the FMS.
|
||||
$NumAttrs, # Number of attr values from the resource returned in this event
|
||||
$NumAttrsInExpr, # How many of the above were attributes in the event expression
|
||||
$IndexForAttrs, # The starting index of the array of values. Until new fixed fields are added
|
||||
# to LastEvent, this will be the element right after this one.
|
||||
$AttrArray # This list of attribute names, types, and values
|
||||
) = split(/,/, $event, 12);
|
||||
|
||||
my @attrArray = split(/,/, $AttrArray); # Note: parsing this way does not support SDs or SD Arrays that may be in this list
|
||||
|
||||
my $j = 0; # index into attrArray
|
||||
for (my $i=0; $i<$NumAttrs; $i++) {
|
||||
my $attrName = $attrArray[$j++];
|
||||
my $attrType = $attrArray[$j++]; # Types <= 8 are "simple" types. Types > 8 are SDs and arrays.
|
||||
my $attrValue = $attrArray[$j++];
|
||||
if ($attrName eq '"Name"') { $node = $attrValue; }
|
||||
if ($attrName eq '"Status"') { $status = $attrValue; }
|
||||
}
|
||||
} else {
|
||||
`logger xCAT "[mon]: updatexcatnodestatus: This script does not handle condition $cond_name"`;
|
||||
exit 1;
|
||||
}
|
||||
my $status_string;
|
||||
if ($status == 1) { $status_string="alive"; }
|
||||
else { $status_string="unreachable"; }
|
||||
|
||||
my $result=`$::XCATROOT/sbin/chtab node=$node nodelist.status=$status_string 2>&1`;
|
||||
my $code=$?;
|
||||
if ($code) {
|
||||
`logger xCAT "[mon]: Error saving node status ($node,$status_string) to xCAT:$result"`;
|
||||
exit $code;
|
||||
}
|
||||
} else { #batch event
|
||||
if ($cond_name ne "NodeReachability_Batch") {
|
||||
`logger xCAT "[mon]: updatexcatnodestatus: This script does not handle condition $cond_name"`;
|
||||
exit 1;
|
||||
}
|
||||
if ($ENV{ERRM_COND_BATCH_NUM} > 0) {
|
||||
#check if event detail file exist
|
||||
if (!exists($ENV{ERRM_EVENT_DETAIL_FILE})){
|
||||
xCAT::MsgUtils->message('S', "logeventtoxcat: no event detail file specified in the response $respname for condition $cond_name.\n");
|
||||
exit (1);
|
||||
}
|
||||
|
||||
my $filename=$ENV{ERRM_EVENT_DETAIL_FILE};
|
||||
if (! -f $filename) {
|
||||
xCAT::MsgUtils->message('S', "logeventtoxcat: cannot find event detail file $filename in response $respname for condition $cond_name.\n");
|
||||
exit (1);
|
||||
}
|
||||
|
||||
# This parse the LastEvent
|
||||
my ( # split the SD into the following fields:
|
||||
$Occurred, # One if the condition has been triggered
|
||||
$ErrNum, # Non-zero if there was in error in the event registration
|
||||
$ErrMsg, # The string msg related to ErrNum
|
||||
$EventFlags, # Bit mask giving some additional info about the event
|
||||
$EventTime, # Time of event expressed in seconds since 1/1/1970
|
||||
$EventTimeMicros, # Number of microseconds past EventTime
|
||||
$ResourceHandle, # Binary address of the RMC resource that caused the condition to be triggered
|
||||
$NodeName, # The node on which the event occurred. For conditions that use the management domain scope (4),
|
||||
# this will be the leaf node. For conditions that use the local scope (e.g. NodeReachability),
|
||||
# this will be the FMS.
|
||||
$NumAttrs, # Number of attr values from the resource returned in this event
|
||||
$NumAttrsInExpr, # How many of the above were attributes in the event expression
|
||||
$IndexForAttrs, # The starting index of the array of values. Until new fixed fields are added
|
||||
# to LastEvent, this will be the element right after this one.
|
||||
$AttrArray # This list of attribute names, types, and values
|
||||
) = split(/,/, $event, 12);
|
||||
open(FILE1, "<$filename");
|
||||
my $line1=readline(FILE1);
|
||||
my @aTemp=split(/=/, $line1);
|
||||
my $num_events=$aTemp[1];
|
||||
close(FILE1);
|
||||
|
||||
my @attrArray = split(/,/, $AttrArray); # Note: parsing this way does not support SDs or SD Arrays that may be in this list
|
||||
my $count;
|
||||
my @active=();
|
||||
my @inactive=();
|
||||
my %new_value=();
|
||||
for ($count = 1; $count <= $num_events; $count++) {
|
||||
my $content=`sed -n "/Event $count\$/, /(null)/ p" $filename`;
|
||||
|
||||
my @content_array=split(/\n/, $content);
|
||||
pop(@content_array); #get rid of last line
|
||||
shift(@content_array); #get rid of firt line
|
||||
|
||||
my %content_hash=();
|
||||
foreach(@content_array) {
|
||||
/([^\=]+)\=(.*)/;
|
||||
$content_hash{$1}=$2;
|
||||
}
|
||||
|
||||
my $j = 0; # index into attrArray
|
||||
for (my $i=0; $i<$NumAttrs; $i++) {
|
||||
my $attrName = $attrArray[$j++];
|
||||
my $attrType = $attrArray[$j++]; # Types <= 8 are "simple" types. Types > 8 are SDs and arrays.
|
||||
my $attrValue = $attrArray[$j++];
|
||||
if ($attrName eq '"Name"') { $node = $attrValue; }
|
||||
if ($attrName eq '"Status"') { $status = $attrValue; }
|
||||
}
|
||||
} else {
|
||||
`logger xCAT "[mon]: updatexcatnodestatus: This script does not handle condition $cond_name"`;
|
||||
exit 1;
|
||||
}
|
||||
my $status_string;
|
||||
if ($status == 1) { $status_string="alive"; }
|
||||
else { $status_string="unreachable"; }
|
||||
my $node;
|
||||
my $status;
|
||||
my $status_string;
|
||||
$node=$content_hash{ERRM_RSRC_NAME};
|
||||
$status=$content_hash{ERRM_VALUE};
|
||||
|
||||
my $result=`$::XCATROOT/bin/nodech $node nodelist.status=$status_string 2>&1`;
|
||||
my $code=$?;
|
||||
if ($code) {
|
||||
`logger xCAT "[mon]: Error saving node status ($node,$status_string) to xCAT:$result"`;
|
||||
exit $code;
|
||||
if ($status == 1) { $new_value{$node}=1; }
|
||||
else { $new_value{$node}=0; }
|
||||
} #end for
|
||||
foreach my $node (keys %new_value) {
|
||||
if ($new_value{$node} == 1) { push(@active, $node);}
|
||||
else { push(@inactive, $node);}
|
||||
} #end foreach
|
||||
|
||||
|
||||
if (@active > 0) {
|
||||
my $node_string=join(',',@active);
|
||||
my $result=`XCATBYPASS=Y $::XCATROOT/bin/nodech $node_string nodelist.status=active 2>&1`;
|
||||
my $code=$?;
|
||||
if ($code) {
|
||||
`logger xCAT "[mon]: Error saving node status ($node_string,active) to xCAT:$result"`;
|
||||
}
|
||||
}
|
||||
if (@inactive > 0) {
|
||||
my $node_string=join(',',@inactive);
|
||||
my $result=`XCATBYPASS=Y $::XCATROOT/bin/nodech $node_string nodelist.status=inactive 2>&1`;
|
||||
|
||||
my $code=$?;
|
||||
if ($code) {
|
||||
`logger xCAT "[mon]: Error saving node status ($node_string,inactive) to xCAT:$result"`;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
exit 0
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user