From a4330ee00ef3cbd591a653237d853e784162453b Mon Sep 17 00:00:00 2001 From: linggao Date: Fri, 6 Mar 2009 16:30:03 +0000 Subject: [PATCH] event batching support for node status monitoring using RMC git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@2844 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd --- xCAT-rmc/plugin/rmcmon.pm | 123 ++++++++---- .../IBM.Condition/NodeReachability_Batch.pm | 29 +++ .../UpdatexCATNodeStatus_Batch.pm | 20 ++ xCAT-rmc/scripts/logevent_simple | 39 ++++ xCAT-rmc/scripts/logeventtoxcat | 2 +- xCAT-rmc/scripts/updatexcatnodestatus | 179 +++++++++++++----- 6 files changed, 309 insertions(+), 83 deletions(-) create mode 100644 xCAT-rmc/resources/sn/IBM.Condition/NodeReachability_Batch.pm create mode 100644 xCAT-rmc/resources/sn/IBM.EventResponse/UpdatexCATNodeStatus_Batch.pm create mode 100755 xCAT-rmc/scripts/logevent_simple diff --git a/xCAT-rmc/plugin/rmcmon.pm b/xCAT-rmc/plugin/rmcmon.pm index a2efc3a46..3ef9f48f1 100644 --- a/xCAT-rmc/plugin/rmcmon.pm +++ b/xCAT-rmc/plugin/rmcmon.pm @@ -613,25 +613,52 @@ sub startNodeStatusMon { reportError($retmsg, $callback); } - - if (!$isSV) { - #start monitoring the status of mn's immediate children - my $result=`startcondresp NodeReachability UpdatexCATNodeStatus 2>&1`; - if (($?) && ($result !~ /2618-244|2618-008/)) { #started - $retcode=$?; - $retmsg="Error start node status monitoring: $result"; - reportError($retmsg, $callback); - } - - #start monitoring the status of mn's grandchildren via their service nodes - $result=`startcondresp NodeReachability_H UpdatexCATNodeStatus 2>&1`; - if (($?) && ($result !~ /2618-244|2618-008/)) { #started - $retcode=$?; - $retmsg="Error start node status monitoring: $result"; - reportError($retmsg, $callback); - } + #get rsct version + my $result=`/usr/sbin/rsct/install/bin/ctversion`; + my $rsct_ver="0.0.0"; + if (!$?) { + chomp($result); + my @tempa=split(/ /, $result); + if (@tempa>1) { + $rsct_ver=$tempa[1]; + } } + #get setting from the monsetting table + my $batch=0; + if (xCAT::Utils->CheckVersion($rsct_ver, "2.3.5.0") >= 0) { + my %settings=xCAT_monitoring::monitorctrl->getPluginSettings("rmcmon"); + + my $batch_string=$settings{'nodeusebatch'}; + if ($batch_string =~ /1|Yes|yes|YES|Y|y/ ) { $batch=1;} + } + + if ($batch) { + my $result=`startcondresp NodeReachability_Batch UpdatexCATNodeStatus_Batch 2>&1`; + if (($?) && ($result !~ /2618-244|2618-008/)) { #started + $retcode=$?; + $retmsg="Error start node status monitoring: $result"; + reportError($retmsg, $callback); + } + } else { + if (!$isSV) { + #start monitoring the status of mn's immediate children + my $result=`startcondresp NodeReachability UpdatexCATNodeStatus 2>&1`; + if (($?) && ($result !~ /2618-244|2618-008/)) { #started + $retcode=$?; + $retmsg="Error start node status monitoring: $result"; + reportError($retmsg, $callback); + } + + #start monitoring the status of mn's grandchildren via their service nodes + $result=`startcondresp NodeReachability_H UpdatexCATNodeStatus 2>&1`; + if (($?) && ($result !~ /2618-244|2618-008/)) { #started + $retcode=$?; + $retmsg="Error start node status monitoring: $result"; + reportError($retmsg, $callback); + } + } + } return ($retcode, $retmsg); } @@ -740,24 +767,54 @@ sub stopNodeStatusMon { my $retmsg=""; my $isSV=xCAT::Utils->isServiceNode(); - if ($isSV) { return ($retcode, $retmsg); } my $localhostname=hostname(); - #stop monitoring the status of mn's immediate children - my $result=`stopcondresp NodeReachability UpdatexCATNodeStatus 2>&1`; - if (($?) && ($result !~ /2618-264/)) { #stoped - $retcode=$?; - $retmsg="Error stop node status monitoring: $result"; - reportError($retmsg, $callback); + #get rsct version + my $result=`/usr/sbin/rsct/install/bin/ctversion`; + my $rsct_ver="0.0.0"; + if (!$?) { + chomp($result); + my @tempa=split(/ /, $result); + if (@tempa>1) { + $rsct_ver=$tempa[1]; + } + } + + #get setting from the monsetting table + my $batch=0; + if (xCAT::Utils->CheckVersion($rsct_ver, "2.3.5.0") >= 0) { + my %settings=xCAT_monitoring::monitorctrl->getPluginSettings("rmcmon"); + + my $batch_string=$settings{'nodeusebatch'}; + if ($batch_string =~ /1|Yes|yes|YES|Y|y/ ) { $batch=1;} + } + if ($batch) { + my $result=`stopcondresp NodeReachability_Batch UpdatexCATNodeStatus_Batch 2>&1`; + if (($?) && ($result !~ /2618-264/)) { #stoped + $retcode=$?; + $retmsg="Error stop node status monitoring: $result"; + reportError($retmsg, $callback); + } + + } else { + if ($isSV) { return ($retcode, $retmsg); } + + #stop monitoring the status of mn's immediate children + my $result=`stopcondresp NodeReachability UpdatexCATNodeStatus 2>&1`; + if (($?) && ($result !~ /2618-264/)) { #stoped + $retcode=$?; + $retmsg="Error stop node status monitoring: $result"; + reportError($retmsg, $callback); + } + #stop monitoring the status of mn's grandchildren via their service nodes + $result=`stopcondresp NodeReachability_H UpdatexCATNodeStatus 2>&1`; + if (($?) && ($result !~ /2618-264/)) { #stoped + $retcode=$?; + $retmsg="Error stop node status monitoring: $result"; + reportError($retmsg, $callback); + } } - #stop monitoring the status of mn's grandchildren via their service nodes - $result=`stopcondresp NodeReachability_H UpdatexCATNodeStatus 2>&1`; - if (($?) && ($result !~ /2618-264/)) { #stoped - $retcode=$?; - $retmsg="Error stop node status monitoring: $result"; - reportError($retmsg, $callback); - } return ($retcode, $retmsg); } @@ -1277,7 +1334,9 @@ sub getDescription { monitoring domain for RMC during node deployment time. Settings: rfanout -- indicating the fanout number for configuring or deconfiguring - remote nodes."; + remote nodes. + nodeusebatch -- use RMC event batching function when doing node status monitoring. +"; } #-------------------------------------------------------------------------------- diff --git a/xCAT-rmc/resources/sn/IBM.Condition/NodeReachability_Batch.pm b/xCAT-rmc/resources/sn/IBM.Condition/NodeReachability_Batch.pm new file mode 100644 index 000000000..ea1b00268 --- /dev/null +++ b/xCAT-rmc/resources/sn/IBM.Condition/NodeReachability_Batch.pm @@ -0,0 +1,29 @@ +#!/usr/bin/perl + +BEGIN +{ + $::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : '/opt/xcat'; +} +use lib "$::XCATROOT/lib/perl"; +use xCAT::Utils; + +if (exists($ENV{RSCT_VER})) { + my $rsct_ver=$ENV{RSCT_VER}; + if (xCAT::Utils->CheckVersion($rsct_ver, "2.3.5.0") < 0) { + exit(0); + } +} + +$RES::Condition{'NodeReachability_Batch'} = { + Name => q(NodeReachability_Batch), + ResourceClass => q(IBM.MngNode), + EventExpression => q(Status!=Status@P), + EventDescription => q(An event will be generated when a status changes), + ManagementScope => q(1), + EventBatchingInterval => q(60), + EventBatchingMaxEvents => q(200), + Severity => q(2), +}; + + +1; diff --git a/xCAT-rmc/resources/sn/IBM.EventResponse/UpdatexCATNodeStatus_Batch.pm b/xCAT-rmc/resources/sn/IBM.EventResponse/UpdatexCATNodeStatus_Batch.pm new file mode 100644 index 000000000..819c52b50 --- /dev/null +++ b/xCAT-rmc/resources/sn/IBM.EventResponse/UpdatexCATNodeStatus_Batch.pm @@ -0,0 +1,20 @@ +#!/usr/bin/perl +BEGIN +{ + $::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : '/opt/xcat'; +} +use lib "$::XCATROOT/lib/perl"; +use xCAT::Utils; + +if (exists($ENV{RSCT_VER})) { + my $rsct_ver=$ENV{RSCT_VER}; + if (xCAT::Utils->CheckVersion($rsct_ver, "2.3.5.0") < 0) { exit 0;} +} + +$RES::EventResponse{'UpdatexCATNodeStatus_Batch'} = { + Name => q(UpdatexCATNodeStatus_Batch), + Locked => q(0), + EventBatching => q(1), + Actions => q({[updatexCAT,{127},{0},{86400},/opt/xcat/sbin/rmcmon/updatexcatnodestatus,3,0,0,0,{},0]}), +}; +1; diff --git a/xCAT-rmc/scripts/logevent_simple b/xCAT-rmc/scripts/logevent_simple new file mode 100755 index 000000000..705219af6 --- /dev/null +++ b/xCAT-rmc/scripts/logevent_simple @@ -0,0 +1,39 @@ +#!/usr/bin/env perl +BEGIN +{ + $::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : '/opt/xcat'; +} +use lib "$::XCATROOT/lib/perl"; + +open(FILE, ">>/var/log/logevent_simple.log") or dir ("cannot open the file\n"); +($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time); +printf FILE "\n-----------%2d-%02d-%04d %02d:%02d:%02d-----------\n", $mon+1,$mday,$year+1900,$hour,$min,$sec; + +my $respname=$ENV{ERRM_ER_NAME}; +my $cond_name=$ENV{ERRM_COND_NAME}; +my $batch=0; +if (exists($ENV{ERRM_COND_BATCH})) { $batch=$ENV{ERRM_COND_BATCH}; } +if ($batch) { + if ($ENV{ERRM_COND_BATCH_NUM} > 0) { + #check if event detail file exist + if (!exists($ENV{ERRM_EVENT_DETAIL_FILE})){ + xCAT::MsgUtils->message('S', "logeventtoxcat: no event detail file specified in the response $respname for condition $cond_name.\n"); + exit (1); + } + + my $filename=$ENV{ERRM_EVENT_DETAIL_FILE}; + if (! -f $filename) { + xCAT::MsgUtils->message('S', "logeventtoxcat: cannot find event detail file $filename in response $respname for condition $cond_name.\n"); + exit (1); + } + open(FILE1, "<$filename"); + my $line1=readline(FILE1); + my @aTemp=split(/=/, $line1); + my $num_events=$aTemp[1]; + close(FILE1); + print FILE "Total events=$num_events\n\n"; + } +} +close(FILE); +return 0; + diff --git a/xCAT-rmc/scripts/logeventtoxcat b/xCAT-rmc/scripts/logeventtoxcat index 6a974afc7..bc14f9529 100755 --- a/xCAT-rmc/scripts/logeventtoxcat +++ b/xCAT-rmc/scripts/logeventtoxcat @@ -63,7 +63,7 @@ if (!$batch) { #handle single event my $count; for ($count = 1; $count <= $num_events; $count++) { - my $content=`sed -n "/Event $count/, /(null)/ p" $filename`; + my $content=`sed -n "/Event $count\$/, /(null)/ p" $filename`; my @content_array=split(/\n/, $content); pop(@content_array); #get rid of last line diff --git a/xCAT-rmc/scripts/updatexcatnodestatus b/xCAT-rmc/scripts/updatexcatnodestatus index 2dbea2079..9abcdd41c 100755 --- a/xCAT-rmc/scripts/updatexcatnodestatus +++ b/xCAT-rmc/scripts/updatexcatnodestatus @@ -13,62 +13,141 @@ use strict; use Getopt::Std; use POSIX qw(strftime); +my $respname=$ENV{ERRM_ER_NAME}; my $cond_name=$ENV{ERRM_COND_NAME}; -my $node; -my $status; +my $batch=0; +if (exists($ENV{ERRM_COND_BATCH})) { $batch=$ENV{ERRM_COND_BATCH}; } -if ($cond_name eq "NodeReachability") { - $node=$ENV{ERRM_RSRC_NAME}; - $status=$ENV{ERRM_VALUE}; -} elsif ($cond_name eq "NodeReachability_H") { - # Parse the ERRM_VALUE attribute, which will contain the - # LastEvent structured data variable from the Condition class - # The fields in this structured data variable are documented below where we parse them out. - my $event = $ENV{ERRM_VALUE}; - $event =~ s/^\[(.*)\]$/$1/; # SD variables have square brackets around them +if (!$batch) { + my $node; + my $status; + if ($cond_name eq "NodeReachability") { + $node=$ENV{ERRM_RSRC_NAME}; + $status=$ENV{ERRM_VALUE}; + } elsif ($cond_name eq "NodeReachability_H") { + # Parse the ERRM_VALUE attribute, which will contain the + # LastEvent structured data variable from the Condition class + # The fields in this structured data variable are documented below where we parse them out. + my $event = $ENV{ERRM_VALUE}; + $event =~ s/^\[(.*)\]$/$1/; # SD variables have square brackets around them + + # This parse the LastEvent + my ( # split the SD into the following fields: + $Occurred, # One if the condition has been triggered + $ErrNum, # Non-zero if there was in error in the event registration + $ErrMsg, # The string msg related to ErrNum + $EventFlags, # Bit mask giving some additional info about the event + $EventTime, # Time of event expressed in seconds since 1/1/1970 + $EventTimeMicros, # Number of microseconds past EventTime + $ResourceHandle, # Binary address of the RMC resource that caused the condition to be triggered + $NodeName, # The node on which the event occurred. For conditions that use the management domain scope (4), + # this will be the leaf node. For conditions that use the local scope (e.g. NodeReachability), + # this will be the FMS. + $NumAttrs, # Number of attr values from the resource returned in this event + $NumAttrsInExpr, # How many of the above were attributes in the event expression + $IndexForAttrs, # The starting index of the array of values. Until new fixed fields are added + # to LastEvent, this will be the element right after this one. + $AttrArray # This list of attribute names, types, and values + ) = split(/,/, $event, 12); + + my @attrArray = split(/,/, $AttrArray); # Note: parsing this way does not support SDs or SD Arrays that may be in this list + + my $j = 0; # index into attrArray + for (my $i=0; $i<$NumAttrs; $i++) { + my $attrName = $attrArray[$j++]; + my $attrType = $attrArray[$j++]; # Types <= 8 are "simple" types. Types > 8 are SDs and arrays. + my $attrValue = $attrArray[$j++]; + if ($attrName eq '"Name"') { $node = $attrValue; } + if ($attrName eq '"Status"') { $status = $attrValue; } + } + } else { + `logger xCAT "[mon]: updatexcatnodestatus: This script does not handle condition $cond_name"`; + exit 1; + } + my $status_string; + if ($status == 1) { $status_string="alive"; } + else { $status_string="unreachable"; } + + my $result=`$::XCATROOT/sbin/chtab node=$node nodelist.status=$status_string 2>&1`; + my $code=$?; + if ($code) { + `logger xCAT "[mon]: Error saving node status ($node,$status_string) to xCAT:$result"`; + exit $code; + } +} else { #batch event + if ($cond_name ne "NodeReachability_Batch") { + `logger xCAT "[mon]: updatexcatnodestatus: This script does not handle condition $cond_name"`; + exit 1; + } + if ($ENV{ERRM_COND_BATCH_NUM} > 0) { + #check if event detail file exist + if (!exists($ENV{ERRM_EVENT_DETAIL_FILE})){ + xCAT::MsgUtils->message('S', "logeventtoxcat: no event detail file specified in the response $respname for condition $cond_name.\n"); + exit (1); + } + + my $filename=$ENV{ERRM_EVENT_DETAIL_FILE}; + if (! -f $filename) { + xCAT::MsgUtils->message('S', "logeventtoxcat: cannot find event detail file $filename in response $respname for condition $cond_name.\n"); + exit (1); + } - # This parse the LastEvent - my ( # split the SD into the following fields: - $Occurred, # One if the condition has been triggered - $ErrNum, # Non-zero if there was in error in the event registration - $ErrMsg, # The string msg related to ErrNum - $EventFlags, # Bit mask giving some additional info about the event - $EventTime, # Time of event expressed in seconds since 1/1/1970 - $EventTimeMicros, # Number of microseconds past EventTime - $ResourceHandle, # Binary address of the RMC resource that caused the condition to be triggered - $NodeName, # The node on which the event occurred. For conditions that use the management domain scope (4), - # this will be the leaf node. For conditions that use the local scope (e.g. NodeReachability), - # this will be the FMS. - $NumAttrs, # Number of attr values from the resource returned in this event - $NumAttrsInExpr, # How many of the above were attributes in the event expression - $IndexForAttrs, # The starting index of the array of values. Until new fixed fields are added - # to LastEvent, this will be the element right after this one. - $AttrArray # This list of attribute names, types, and values - ) = split(/,/, $event, 12); + open(FILE1, "<$filename"); + my $line1=readline(FILE1); + my @aTemp=split(/=/, $line1); + my $num_events=$aTemp[1]; + close(FILE1); - my @attrArray = split(/,/, $AttrArray); # Note: parsing this way does not support SDs or SD Arrays that may be in this list + my $count; + my @active=(); + my @inactive=(); + my %new_value=(); + for ($count = 1; $count <= $num_events; $count++) { + my $content=`sed -n "/Event $count\$/, /(null)/ p" $filename`; + + my @content_array=split(/\n/, $content); + pop(@content_array); #get rid of last line + shift(@content_array); #get rid of firt line + + my %content_hash=(); + foreach(@content_array) { + /([^\=]+)\=(.*)/; + $content_hash{$1}=$2; + } - my $j = 0; # index into attrArray - for (my $i=0; $i<$NumAttrs; $i++) { - my $attrName = $attrArray[$j++]; - my $attrType = $attrArray[$j++]; # Types <= 8 are "simple" types. Types > 8 are SDs and arrays. - my $attrValue = $attrArray[$j++]; - if ($attrName eq '"Name"') { $node = $attrValue; } - if ($attrName eq '"Status"') { $status = $attrValue; } - } -} else { - `logger xCAT "[mon]: updatexcatnodestatus: This script does not handle condition $cond_name"`; - exit 1; -} -my $status_string; -if ($status == 1) { $status_string="alive"; } -else { $status_string="unreachable"; } + my $node; + my $status; + my $status_string; + $node=$content_hash{ERRM_RSRC_NAME}; + $status=$content_hash{ERRM_VALUE}; -my $result=`$::XCATROOT/bin/nodech $node nodelist.status=$status_string 2>&1`; -my $code=$?; -if ($code) { - `logger xCAT "[mon]: Error saving node status ($node,$status_string) to xCAT:$result"`; - exit $code; + if ($status == 1) { $new_value{$node}=1; } + else { $new_value{$node}=0; } + } #end for + foreach my $node (keys %new_value) { + if ($new_value{$node} == 1) { push(@active, $node);} + else { push(@inactive, $node);} + } #end foreach + + + if (@active > 0) { + my $node_string=join(',',@active); + my $result=`XCATBYPASS=Y $::XCATROOT/bin/nodech $node_string nodelist.status=active 2>&1`; + my $code=$?; + if ($code) { + `logger xCAT "[mon]: Error saving node status ($node_string,active) to xCAT:$result"`; + } + } + if (@inactive > 0) { + my $node_string=join(',',@inactive); + my $result=`XCATBYPASS=Y $::XCATROOT/bin/nodech $node_string nodelist.status=inactive 2>&1`; + + my $code=$?; + if ($code) { + `logger xCAT "[mon]: Error saving node status ($node_string,inactive) to xCAT:$result"`; + } + } + } } exit 0