xcat-core/xCAT-nbroot/overlay/bin/dodestiny
jbjohnso 2c703ff14b -Modify nbroot to be able to function without xcatd= being in /proc/cmdline (still honors it)
-Eliminate a lot of output  (debug and expected error messages)
-Properly failover to all dhcpservers for getdestiny, nextdestiny, and getipmi
-Randomly vary some sleep intervals to self-segregate nodes in time to take it easy on xCATd
-Enhance x86 architecture detection to differentiate x86_64 and x86 with an x86 kernel
-Only down nics during discovery if the broadcast domains conflict


git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@1012 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd
2008-04-09 20:22:46 +00:00

184 lines
7.3 KiB
Bash
Executable File

#!/bin/sh
# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html
for parm in `cat /proc/cmdline`; do
key=`echo $parm|awk -F= '{print $1}'`
if [ "$key" == "xcatd" ]; then
XCATDEST=`echo $parm|awk -F= '{print $2}'`
fi
done
export XCATPORT=3001
if [ ! -z "$XCATDEST" ]; then
export XCATMASTER=`echo $XCATDEST | awk -F: '{print $1}'`
export XCATPORT=`echo $XCATDEST | awk -F: '{print $2}'`
fi
while :; do
DESTINY=`grep destiny /tmp/destiny | awk -F'>' '{print $2}'|awk -F'<' '{print $1}'`
DEST=`echo $DESTINY|awk -F= '{print $1}'` #No bash, no tricks
TARG=`echo $DESTINY|awk -F= '{print $2}'` #No bash, no tricks
DESTINY=`echo $DESTINY|awk '{print $1}'` #No bash, no tricks
if [ "$DESTINY" == "standby" ]; then
echo "Server notified us of standby condition, please check chain table".
let STSLEEP=15+$RANDOM%15
echo "Retrying destiny in $STSLEEP seconds"
sleep $STSLEEP # something may be transiently wrong, check back in 15 seconds
while ! getdestiny; do
echo "Retrying destiny retrieval"
let RTSLEEP=$RANDOM%5
sleep $RTSLEEP
done
exec /bin/dodestiny
fi
if [ "$DESTINY" == "shell" ]; then
echo "Server notified us to stay in shell state, stopping destiny requests"
while :; do /bin/sh; done #exit
fi
if [ "$DESTINY" == "discover" ]; then
echo "MAC discovery begins"
minixcatd.awk &
sleep 1 #Mitigate occurrances of 'failed to notify node'
while [ ! -r /restart ]; do
let myr=$RANDOM%10
sleep $myr #Stagger discovery requests from many nodes
ifconfig -a|grep HWaddr|grep -v sit|awk '{print $1 "|" $5}'
sleep 1
MTM="unknown"
SERIAL="unknown"
ARCH="unknown"
if uname -m | grep i686 > /dev/null || uname -m | grep x86_64 > /dev/null; then
if grep ^flags /proc/cpuinfo |head -n 1|grep " lm " > /dev/null; then
ARCH=x86_64
else
ARCH=x86
fi
else
ARCH=`uname -m`
fi
if [ -x /bin/vpddecode ]; then
MTM=`(/bin/vpddecode|grep Type || echo "unknown unknown: unknown")|awk '{print $3}'`
SERIAL=`(/bin/vpddecode|grep "Box Serial" || echo "unknown unknown unknown: unknown")|awk '{print $4}'`
fi
if [ "$MTM" == "unknown" -a -x /bin/dmidecode ]; then #This gets a bit hackish... iDataplex
MTM=`dmidecode |grep -A4 "^System Information"|grep "Product Name"|awk -F'[' '{print $2}'|awk -F']' '{print $1}'`
SERIAL=`dmidecode |grep -A4 "^System Information"|grep "Serial Number"|awk -F: '{print $2}'`
fi
if [ -r /proc/device-tree/model ]; then
MTM=`cat /proc/device-tree/model |awk -F, '{print $2}'`
fi
(
echo "<xcatrequest>"
echo "<command>findme</command>"
echo "<arch>$ARCH</arch>"
for i in `ifconfig -a|grep HWaddr|grep -v sit|awk '{print $1 "|" $5}'`; do
IFACE=`echo $i|awk -F'|' '{print $1}'`
DRIVER=`ethtool -i $IFACE|grep ^driver|awk '{print $2}'`
ADDRESS=`ip address show dev $IFACE|grep 'inet '|awk '{print $2}'`
echo "<mac>$DRIVER|$i|$ADDRESS</mac>"
done
modprobe ipmi_devintf
if modprobe ipmi_si; then
echo "<mac>bmc|bmc|"`ipmitool lan print 1|grep ^MAC|awk '{print $4}'`"</mac>"
fi
rmmod ipmi_si
rmmod ipmi_devintf
if [ "$MTM" != "unknown" ]; then
echo "<mtm>$MTM</mtm>"
fi
if [ "$SERIAL" != "unknown" ]; then
echo "<serial>$SERIAL</serial>"
fi
echo "</xcatrequest>" ) > /tmp/discout
if [ ! -z "$XCATMASTER" ]; then
ping -c 1 $XCATMASTER
cat /tmp/discout | udpcat.awk $XCATMASTER $XCATPORT & #can't figure out how to make a hung gawk behave..
fi
if [ -z "$XCATMASTER" ] || sleep 8 > /dev/null 2>&1 #Give the preferred method 8 seconds to complete before resorting
then
#if sleep succeeded, that means it wasn't killed and therefore, no answer yet
killall udpcat.awk > /dev/null 2>&1 #reap hung ones
for nic in `ifconfig -a|grep HWaddr|grep -v sit|awk '{print $1}'`; do #also, bring down interfaces to make sure that we send from the 'right' nic
MYB=`ifconfig $nic|grep "Bcast"|awk '{print $3}'|awk -F: '{print $2}'`
for dnic in `ifconfig -a|grep HWaddr|grep -v sit|awk '{print $1}'|grep -v $nic`; do
OTB=`ifconfig $dnic|grep "Bcast"|awk '{print $3}'|awk -F: '{print $2}'`
if [ "$OTB" == "$MYB" ]; then # if broadcasts match, down the other nic
ifconfig $dnic down
fi
done
if [ ! -z "$XCATMASTER" ]; then
(ping -c 1 $XCATMASTER
cat /tmp/discout | udpcat.awk $XCATMASTER $XCATPORT ) & #can't figure out how to make a hung gawk behave..
fi
for dhcps in `cat /tmp/dhcpserver`; do
( ping -c 1 $dhcps
cat /tmp/discout | udpcat.awk $dhcps $XCATPORT )&
done
for dnic in `ifconfig -a|grep HWaddr|grep -v sit|awk '{print $1}'|grep -v $nic`; do
ifconfig $dnic up
done
if ! sleep 5 > /dev/null 2>&1; then break; fi # give management server a chance to get to minixcatd.awk
done
fi
killall udpcat.awk > /dev/null 2>&1 #reap hung ones
done
#Discovery complete, restart requested.
exec /bin/restart
fi
if [ "$DESTINY" == "reboot" -o "$DESTINY" == "boot" ]; then
while ! nextdestiny ; do
echo "Retrying next destiny..."
done
reboot -f
fi
if [ "$DEST" == "runcmd" ]; then
while ! nextdestiny ; do
echo "Retrying next destiny..."
done
$TARG
fi
if [ "$DESTINY" == "install" -o "$DESTINY" == "netboot" ]; then
reboot -f #If script is here, kexec failed, reboot in case it wasn't a linux kernel and let the boot loader handle it instead
IMGSERVER=`grep imgserver /tmp/destiny | awk -F'>' '{print $2}'|awk -F'<' '{print $1}'`
INITRD=`grep initrd /tmp/destiny | awk -F'>' '{print $2}'|awk -F'<' '{print $1}'`
KERNEL=`grep kernel /tmp/destiny | awk -F'>' '{print $2}'|awk -F'<' '{print $1}'`
KCMD=`grep kcmdline /tmp/destiny | awk -F'>' '{print $2}'|awk -F'<' '{print $1}'`
ERROR=`wget http://$IMGSERVER/tftpboot/$KERNEL -O /tmp/kernel 2>&1`
while [ $? == 1 ] && echo $ERROR|grep -v 416; do
sleep 10
ERROR=`wget -c http://$IMGSERVER/tftpboot/$KERNEL -O /tmp/kernel 2>&1`
done
ERROR=`wget -c http://$IMGSERVER/tftpboot/$INITRD -O /tmp/initrd 2>&1`
while [ $? == 1 ] && echo $ERROR|grep -v 416; do
sleep 10
ERROR=`wget -c http://$IMGSERVER/tftpboot/$INITRD -O /tmp/initrd 2>&1`
done
#START getting ready for kexec
for mod in `lsmod|awk '{print $1}'|grep -v Module`; do
rmmod $mod
done
#kexec -f --append="$KCMD" --initrd=/tmp/initrd /tmp/kernel
reboot -f #If script is here, kexec failed, reboot in case it wasn't a linux kernel and let the boot loader handle it instead
fi
if [ "$DEST" == "runimage" ]; then
mkdir /tmp/`basename $TARG`
cd /tmp/`basename $TARG`
ERROR=`wget $TARG`
while [ $? == 1 ] && echo $ERROR|grep -v 416; do
sleep 10
ERROR=`wget -c $TARG 2>&1`
done
while ! nextdestiny ; do
echo "Retrying next destiny..."
done
tar zxvf `basename $TARG`
cd /tmp/`basename $TARG`
./runme.sh
cd -
fi
sleep 5 # something may be transiently wrong, check back in 5 seconds
getdestiny
if grep error /tmp/destiny; then
echo ERROR: see above
fi
done