added support to install predefined sensors to nodes for RMC monitoring

git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@3564 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd
This commit is contained in:
linggao 2009-06-12 19:20:00 +00:00
parent 4c1df7b8a2
commit be4d2366b6
2 changed files with 141 additions and 122 deletions

View File

@ -1,7 +1,15 @@
#!/usr/bin/perl
my $cmd;
if (-r "/var/xcat/rmcmon/scripts/monerrorlog") {
$cmd="/var/xcat/rmcmon/scripts/monerrorlog";
} else {
$cmd="/opt/xcat/sbin/rmcmon/monerrorlog";
}
$RES::Sensor{'ErrorLogSensor'} = {
Name => q(ErrorLogSensor),
Command => "/etc/xcat/rmcmon/scripts/monerrorlog",
Command => "$cmd",
UserName => q(root),
RefreshInterval => q(60),
ControlFlags => q(4),

View File

@ -65,137 +65,147 @@ then
exit 1
fi
#check if the resource is defined
result=`/usr/bin/lsrsrc-api -s IBM.MCP::"NodeID=0x$MS_NODEID"::MNName 2>&1`
if [ $? -gt 0 ]; then
# echo $result
p="2612-023" #resource not found
result1=`awk -v a="$result" -v b="$p" 'BEGIN{print index(a,b)}'`
if [ $result1 -gt 0 ]; then
if [ $ADD -eq -1 ]; then
exit 0; #resource already removed, do nothing
fi
else
echo "$result"
logger xCAT "$result"
exit 1
fi
else
if [ $ADD -eq 1 ]; then
exit 0; #resource already defined, do nothing
fi
fi
#ask RMC take the new nodeid
if [[ $NODEID != "" ]]; then
oldid=`/usr/sbin/rsct/bin/lsnodeid`
oldidU=`echo "$oldid" | awk '{ print toupper($1) }'`
newidU=`echo "$NODEID" | awk '{ print toupper($1) }'`
if [ $oldidU != $newidU ]; then
if [ -e /usr/sbin/rsct/install/bin/recfgctnid ]; then
result=`/usr/sbin/rsct/install/bin/recfgctnid $NODEID 2>&1`
if [ $? -ne 0 ]; then
echo "Reset nodeid: $result"
logger xCAT "Reset nodeid: $result"
fi
else
#stop all rmc daemons
/usr/sbin/rsct/bin/rmcctrl -z
#get new nodeid into id files
if [ -e /var/ct/cfg/ct_node_id ]; then
sed s/^[^\#].*$/$NODEID/ /var/ct/cfg/ct_node_id > /tmp/id.tmp
else
if [ -e /etc/ct_node_id ]; then
sed s/^[^\#].*$/$NODEID/ /etc/ct_node_id > /tmp/id.tmp
if [ $ADD -eq 1 ]; then
#check if the resource is defined
result=`/usr/bin/lsrsrc-api -s IBM.MCP::"NodeID=0x$MS_NODEID"::MNName 2>&1`
if [ $? -gt 0 ]; then #node defined
#ask RMC take the new nodeid
if [[ $NODEID != "" ]]; then
oldid=`/usr/sbin/rsct/bin/lsnodeid`
oldidU=`echo "$oldid" | awk '{ print toupper($1) }'`
newidU=`echo "$NODEID" | awk '{ print toupper($1) }'`
if [ $oldidU != $newidU ]; then
if [ -e /usr/sbin/rsct/install/bin/recfgctnid ]; then
result=`/usr/sbin/rsct/install/bin/recfgctnid $NODEID 2>&1`
if [ $? -ne 0 ]; then
echo "Reset nodeid: $result"
logger xCAT "Reset nodeid: $result"
fi
else
echo $NODEID > /tmp/id.tmp
#stop all rmc daemons
/usr/sbin/rsct/bin/rmcctrl -z
#get new nodeid into id files
if [ -e /var/ct/cfg/ct_node_id ]; then
sed s/^[^\#].*$/$NODEID/ /var/ct/cfg/ct_node_id > /tmp/id.tmp
else
if [ -e /etc/ct_node_id ]; then
sed s/^[^\#].*$/$NODEID/ /etc/ct_node_id > /tmp/id.tmp
else
echo $NODEID > /tmp/id.tmp
fi
fi
cp /tmp/id.tmp /var/ct/cfg/ct_node_id
cp /tmp/id.tmp /etc/ct_node_id
rm /tmp/id.tmp
#reconfig RMC
result=`/usr/sbin/rsct/install/bin/recfgct -s 2>&1`
if [ $? -ne 0 ]; then
echo "RMC setup on node $NODE: Cannot reconfig RSCT with new node id"
logger xCAT "RMC setup on node $NODE: Cannot reconfig RSCT with new node id"
fi
fi
fi
cp /tmp/id.tmp /var/ct/cfg/ct_node_id
cp /tmp/id.tmp /etc/ct_node_id
rm /tmp/id.tmp
#reconfig RMC
result=`/usr/sbin/rsct/install/bin/recfgct -s 2>&1`
if [ $? -ne 0 ]; then
echo "RMC setup on node $NODE: Cannot reconfig RSCT with new node id"
logger xCAT "RMC setup on node $NODE: Cannot reconfig RSCT with new node id"
fi
PID=`/bin/ps -ef | /bin/grep rmcd | /bin/grep -v grep | /bin/awk '{print $2}'`
if [ -z "$PID" ]; then
#restart rmc daemon
result=`/usr/sbin/rsct/bin/rmcctrl -s 2>&1`;
if [ $? -gt 0 ]; then
echo "RMC deamon cannot be started on node $NODE:$result"
logger xCAT "RMC deamon cannot be started on node $NODE:$result"
exit 1;
fi
fi
fi
fi
#wait for RMC daemon to fully functional
RETRY_LIMIT=20 # number of 2 second retry intervals (40 second total)
let i=$RETRY_LIMIT
while :
do
lsrsrc > /dev/null 2>&1
rc=$?
if [[ $rc -eq 0 ]]
then
break
fi
i=$((i - 1))
if (( i > 0 ))
then
sleep 2
else
break
fi
done
if [[ $rc -ne 0 ]]
then
logger xCAT "RMC subsystem has not started"
exit 1
fi
#enable remote client connection
/usr/sbin/rsct/bin/rmcctrl -p; /usr/bin/refrsrc IBM.MCP
#get IP address of MS
ms_ip=$MONMASTER
result=`ping -c1 $MONMASTER 2>&1`
if [ $? -eq 0 ]; then
index1=`expr index "$result" "\("`
index2=`expr index "$result" "\)"`
#ms_ip=${result:$index1+1:$index2-$index1-2}
pos=`expr $index1 + 1`
length=`expr $index2 - $index1`
length=`expr $length - 1`
ms_ip=`expr substr "$result" $pos $length`
else
echo "RMC setup on node $NODE:$result"
logger xCAT "RMC setup on node $NODE:$result"
fi
PID=`/bin/ps -ef | /bin/grep rmcd | /bin/grep -v grep | /bin/awk '{print $2}'`
if [ -z "$PID" ]; then
#restart rmc daemon
result=`/usr/sbin/rsct/bin/rmcctrl -s 2>&1`;
if [ $? -gt 0 ]; then
echo "RMC deamon cannot be started on node $NODE:$result"
logger xCAT "RMC deamon cannot be started on node $NODE:$result"
exit 1;
fi
fi
#wait for RMC daemon to fully functional
RETRY_LIMIT=20 # number of 2 second retry intervals (40 second total)
let i=$RETRY_LIMIT
while :
do
lsrsrc > /dev/null 2>&1
rc=$?
if [[ $rc -eq 0 ]]
then
break
#define resource in IBM.MCP class on node
result1=`/usr/bin/mkrsrc-api IBM.MCP::MNName::"$NODE"::KeyToken::"$MONMASTER"::IPAddresses::"{\"$ms_ip\"}"::NodeID::0x$MS_NODEID 2>&1`
if [ $? -gt 0 ]; then
echo "Define resource in IBM.MCP class on node $NODE. result=$result1"
logger xCAT "Define resource in IBM.MCP class on node $NODE. result=$result1"
exit 1
fi
fi
i=$((i - 1))
if (( i > 0 ))
then
sleep 2
else
break
#create predefined sensors
if [[ -z "$NFSSERVER" ]]; then
NFSSERVER=$MASTER
fi
done
if [[ $rc -ne 0 ]]
then
logger xCAT "RMC subsystem has not started"
exit 1
fi
if [ $ADD -eq 1 ]; then
#enable remote client connection
/usr/sbin/rsct/bin/rmcctrl -p; /usr/bin/refrsrc IBM.MCP
#get IP address of MS
ms_ip=$MONMASTER
result=`ping -c1 $MONMASTER 2>&1`
if [ $? -eq 0 ]; then
index1=`expr index "$result" "\("`
index2=`expr index "$result" "\)"`
#ms_ip=${result:$index1+1:$index2-$index1-2}
pos=`expr $index1 + 1`
length=`expr $index2 - $index1`
length=`expr $length - 1`
ms_ip=`expr substr "$result" $pos $length`
else
echo "RMC setup on node $NODE:$result"
logger xCAT "RMC setup on node $NODE:$result"
fi
#define resource in IBM.MCP class on node
result1=`/usr/bin/mkrsrc-api IBM.MCP::MNName::"$NODE"::KeyToken::"$MONMASTER"::IPAddresses::"{\"$ms_ip\"}"::NodeID::0x$MS_NODEID 2>&1`
if [ $? -gt 0 ]; then
echo "Define resource in IBM.MCP class on node $NODE. result=$result1"
logger xCAT "Define resource in IBM.MCP class on node $NODE. result=$result1"
exit 1
fi
#TODO: create predefined sensors
mkdir -p /var/xcat/rmcmon
rm -f -R /var/xcat/rmcmon
mkdir -p /tmp/postage/rmcmon
rm -f -R /tmp/postage/rmcmon/*
cd /tmp/postage/rmcmon
wget -l inf -N -r --waitretry=10 --random-wait --retry-connrefused -t 0 -T 60 ftp://$NFSSERVER/post/xcat/rmcmon 2> /tmp/wget.log
mv $NFSSERVER/post/xcat/rmcmon /var/xcat/rmcmon;
rm -rf $NFSSERVER
chmod 755 /var/xcat/rmcmon/scripts/*
/var/xcat/rmcmon/scripts/mkrmcresources /var/xcat/rmcmon/resources
else
#check if the resource is defined
result=`/usr/bin/lsrsrc-api -s IBM.MCP::"NodeID=0x$MS_NODEID"::MNName 2>&1`
if [ $? -gt 0 ]; then
# echo $result
p="2612-023" #resource not found
result1=`awk -v a="$result" -v b="$p" 'BEGIN{print index(a,b)}'`
if [ $result1 -gt 0 ]; then
if [ $ADD -eq -1 ]; then
exit 0; #resource already removed, do nothing
fi
else
echo "$result"
logger xCAT "$result"
exit 1
fi
fi
#remove resource in IBM.MCP class on the node
result2=`/usr/bin/rmrsrc-api -s IBM.MCP::"NodeID=0x$MS_NODEID" 2>&1`
if [ $? -gt 0 ]; then
@ -204,6 +214,7 @@ else
exit 1
fi
fi
echo "done"
exit 0;