From 2c703ff14baef2e55dc419a175cb64cfd7ea83f5 Mon Sep 17 00:00:00 2001 From: jbjohnso Date: Wed, 9 Apr 2008 20:22:46 +0000 Subject: [PATCH] -Modify nbroot to be able to function without xcatd= being in /proc/cmdline (still honors it) -Eliminate a lot of output (debug and expected error messages) -Properly failover to all dhcpservers for getdestiny, nextdestiny, and getipmi -Randomly vary some sleep intervals to self-segregate nodes in time to take it easy on xCATd -Enhance x86 architecture detection to differentiate x86_64 and x86 with an x86 kernel -Only down nics during discovery if the broadcast domains conflict git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@1012 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd --- xCAT-nbroot/overlay/bin/dodestiny | 72 ++++++++++++------- xCAT-nbroot/overlay/bin/getdestiny | 10 ++- xCAT-nbroot/overlay/bin/getipmi | 10 ++- xCAT-nbroot/overlay/bin/minixcatd.awk | 3 +- xCAT-nbroot/overlay/bin/nextdestiny | 10 ++- xCAT-nbroot/overlay/bin/nextdestiny.awk | 3 +- xCAT-nbroot/overlay/bin/restart | 8 ++- xCAT-nbroot/overlay/etc/init.d/S11stunnel | 24 +++++-- .../overlay/usr/share/udhcpc/default.script | 4 +- 9 files changed, 100 insertions(+), 44 deletions(-) diff --git a/xCAT-nbroot/overlay/bin/dodestiny b/xCAT-nbroot/overlay/bin/dodestiny index 89b039cd2..15e1094a5 100755 --- a/xCAT-nbroot/overlay/bin/dodestiny +++ b/xCAT-nbroot/overlay/bin/dodestiny @@ -6,9 +6,11 @@ for parm in `cat /proc/cmdline`; do XCATDEST=`echo $parm|awk -F= '{print $2}'` fi done -export XCATMASTER=`echo $XCATDEST | awk -F: '{print $1}'` -export XCATPORT=`echo $XCATDEST | awk -F: '{print $2}'` - +export XCATPORT=3001 +if [ ! -z "$XCATDEST" ]; then + export XCATMASTER=`echo $XCATDEST | awk -F: '{print $1}'` + export XCATPORT=`echo $XCATDEST | awk -F: '{print $2}'` +fi while :; do DESTINY=`grep destiny /tmp/destiny | awk -F'>' '{print $2}'|awk -F'<' '{print $1}'` @@ -17,11 +19,13 @@ while :; do DESTINY=`echo $DESTINY|awk '{print $1}'` #No bash, no tricks if [ "$DESTINY" == "standby" ]; then echo "Server notified us of standby condition, please check chain table". - echo "Retrying destiny in 15 seconds" - usleep 15000000 # something may be transiently wrong, check back in 15 seconds + let STSLEEP=15+$RANDOM%15 + echo "Retrying destiny in $STSLEEP seconds" + sleep $STSLEEP # something may be transiently wrong, check back in 15 seconds while ! getdestiny; do echo "Retrying destiny retrieval" - usleep 300000 + let RTSLEEP=$RANDOM%5 + sleep $RTSLEEP done exec /bin/dodestiny fi @@ -32,12 +36,25 @@ while :; do if [ "$DESTINY" == "discover" ]; then echo "MAC discovery begins" minixcatd.awk & - usleep 500000 #Mitigate occurrances of 'failed to notify node' + sleep 1 #Mitigate occurrances of 'failed to notify node' while [ ! -r /restart ]; do + let myr=$RANDOM%10 + sleep $myr #Stagger discovery requests from many nodes ifconfig -a|grep HWaddr|grep -v sit|awk '{print $1 "|" $5}' sleep 1 MTM="unknown" SERIAL="unknown" + ARCH="unknown" + if uname -m | grep i686 > /dev/null || uname -m | grep x86_64 > /dev/null; then + if grep ^flags /proc/cpuinfo |head -n 1|grep " lm " > /dev/null; then + ARCH=x86_64 + else + ARCH=x86 + fi + else + ARCH=`uname -m` + fi + if [ -x /bin/vpddecode ]; then MTM=`(/bin/vpddecode|grep Type || echo "unknown unknown: unknown")|awk '{print $3}'` SERIAL=`(/bin/vpddecode|grep "Box Serial" || echo "unknown unknown unknown: unknown")|awk '{print $4}'` @@ -52,7 +69,7 @@ while :; do ( echo "" echo "findme" - echo ""`uname -m`"" + echo "$ARCH" for i in `ifconfig -a|grep HWaddr|grep -v sit|awk '{print $1 "|" $5}'`; do IFACE=`echo $i|awk -F'|' '{print $1}'` DRIVER=`ethtool -i $IFACE|grep ^driver|awk '{print $2}'` @@ -72,30 +89,37 @@ while :; do echo "$SERIAL" fi echo "" ) > /tmp/discout - ping -c 1 $XCATMASTER - cat /tmp/discout | udpcat.awk $XCATMASTER $XCATPORT & #can't figure out how to make a hung gawk behave.. - if usleep 8000000 #Give the preferred method 5 seconds to complete before resorting - then - #if usleep succeeded, that means it wasn't killed and therefore, no answer yet - killall udpcat.awk #reap hung ones - for nic in `ifconfig -a|grep HWaddr|grep -v sit|awk '{print $1}'`; do #also, bring down interfaces to make sure that we send from the 'right' nic - for dnic in `ifconfig -a|grep HWaddr|grep -v sit|awk '{print $1}'|grep -v $nic`; do - ifconfig $dnic down - done + if [ ! -z "$XCATMASTER" ]; then ping -c 1 $XCATMASTER cat /tmp/discout | udpcat.awk $XCATMASTER $XCATPORT & #can't figure out how to make a hung gawk behave.. + fi + if [ -z "$XCATMASTER" ] || sleep 8 > /dev/null 2>&1 #Give the preferred method 8 seconds to complete before resorting + then + #if sleep succeeded, that means it wasn't killed and therefore, no answer yet + killall udpcat.awk > /dev/null 2>&1 #reap hung ones + for nic in `ifconfig -a|grep HWaddr|grep -v sit|awk '{print $1}'`; do #also, bring down interfaces to make sure that we send from the 'right' nic + MYB=`ifconfig $nic|grep "Bcast"|awk '{print $3}'|awk -F: '{print $2}'` + for dnic in `ifconfig -a|grep HWaddr|grep -v sit|awk '{print $1}'|grep -v $nic`; do + OTB=`ifconfig $dnic|grep "Bcast"|awk '{print $3}'|awk -F: '{print $2}'` + if [ "$OTB" == "$MYB" ]; then # if broadcasts match, down the other nic + ifconfig $dnic down + fi + done + if [ ! -z "$XCATMASTER" ]; then + (ping -c 1 $XCATMASTER + cat /tmp/discout | udpcat.awk $XCATMASTER $XCATPORT ) & #can't figure out how to make a hung gawk behave.. + fi for dhcps in `cat /tmp/dhcpserver`; do - ping -c 1 $dhcps - cat /tmp/discout | udpcat.awk $dhcps $XCATPORT & + ( ping -c 1 $dhcps + cat /tmp/discout | udpcat.awk $dhcps $XCATPORT )& done for dnic in `ifconfig -a|grep HWaddr|grep -v sit|awk '{print $1}'|grep -v $nic`; do ifconfig $dnic up done + if ! sleep 5 > /dev/null 2>&1; then break; fi # give management server a chance to get to minixcatd.awk done - - usleep 8000000 fi - killall udpcat.awk #reap hung ones + killall udpcat.awk > /dev/null 2>&1 #reap hung ones done #Discovery complete, restart requested. exec /bin/restart @@ -151,7 +175,7 @@ while :; do ./runme.sh cd - fi - usleep 5000000 # something may be transiently wrong, check back in 5 seconds + sleep 5 # something may be transiently wrong, check back in 5 seconds getdestiny if grep error /tmp/destiny; then echo ERROR: see above diff --git a/xCAT-nbroot/overlay/bin/getdestiny b/xCAT-nbroot/overlay/bin/getdestiny index 2424160e3..11dea4b40 100644 --- a/xCAT-nbroot/overlay/bin/getdestiny +++ b/xCAT-nbroot/overlay/bin/getdestiny @@ -1,4 +1,10 @@ #!/bin/sh -if ! getdestiny.awk 301; then - getdestiny.awk 300; +if ! getdestiny.awk 301 > /dev/null 2>&1; then + let i=400 + for srv in `cat /tmp/dhcpserver`; do + if getdestiny.awk $i > /dev/null 2>&1; then + exit + fi + let i=i+1 + done fi diff --git a/xCAT-nbroot/overlay/bin/getipmi b/xCAT-nbroot/overlay/bin/getipmi index f553a23bd..deb1bf53e 100644 --- a/xCAT-nbroot/overlay/bin/getipmi +++ b/xCAT-nbroot/overlay/bin/getipmi @@ -1,4 +1,10 @@ #!/bin/sh -if ! getipmi.awk 301; then - getipmi.awk 300; +if ! getipmi.awk 301 > /dev/null 2>&1; then + let i=400 + for srv in `cat /tmp/dhcpserver`; do + if getipmi.awk $i > /dev/null 2>&1 ; then + exit + fi + let i=i+1 + done fi diff --git a/xCAT-nbroot/overlay/bin/minixcatd.awk b/xCAT-nbroot/overlay/bin/minixcatd.awk index 51089adc3..4e0bc9ba4 100755 --- a/xCAT-nbroot/overlay/bin/minixcatd.awk +++ b/xCAT-nbroot/overlay/bin/minixcatd.awk @@ -5,13 +5,12 @@ BEGIN { listener = "/inet/tcp/" port "/0/0" quit = "no" while (match(quit,"no")) { - print quit; while (match(quit,"no") && (listener |& getline) > 0) { if (match($0,"restart")) { print "restarting bootstrap process" |& listener quit="yes" system("touch /restart") - system("killall usleep") + system("killall sleep") close(listener) } } diff --git a/xCAT-nbroot/overlay/bin/nextdestiny b/xCAT-nbroot/overlay/bin/nextdestiny index 399b7b4c1..9f3e27a1e 100755 --- a/xCAT-nbroot/overlay/bin/nextdestiny +++ b/xCAT-nbroot/overlay/bin/nextdestiny @@ -1,4 +1,10 @@ #!/bin/sh -if ! nextdestiny.awk 301; then - nextdestiny.awk 300; +if ! nextdestiny.awk 301 > /dev/null 2>&1 ; then + let i=400 + for srv in `cat /tmp/dhcpserver`; do + if nextdestiny.awk $i > /dev/null 2>&1 ; then + exit + fi + let i=i+1 + done fi diff --git a/xCAT-nbroot/overlay/bin/nextdestiny.awk b/xCAT-nbroot/overlay/bin/nextdestiny.awk index 992748eb9..0f18eac4d 100755 --- a/xCAT-nbroot/overlay/bin/nextdestiny.awk +++ b/xCAT-nbroot/overlay/bin/nextdestiny.awk @@ -1,7 +1,8 @@ #!/usr/bin/awk -f # IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html BEGIN { - ns = "/inet/tcp/0/127.0.0.1/301" + localport = ARGV[1] + ns = "/inet/tcp/0/127.0.0.1/" localport print "" |& ns print "nextdestiny" |& ns diff --git a/xCAT-nbroot/overlay/bin/restart b/xCAT-nbroot/overlay/bin/restart index 6cc6bb621..55a498968 100755 --- a/xCAT-nbroot/overlay/bin/restart +++ b/xCAT-nbroot/overlay/bin/restart @@ -1,7 +1,13 @@ # IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html #Redhcp, do the xcat part again rm /restart +echo -n > /tmp/dhcpserver killall -12 udhcpc;killall -10 udhcpc -sleep 5 +echo -n "Wating 10 seconds for DHCP changes to take effect " +for i in 1 2 3 4 5 6 7 8 9 10; do + sleep 1 + echo -n . +done +echo "Done waiting" /etc/init.d/S11stunnel #redo stunnel config exec /etc/init.d/S99xcat.sh diff --git a/xCAT-nbroot/overlay/etc/init.d/S11stunnel b/xCAT-nbroot/overlay/etc/init.d/S11stunnel index d49a8c2f5..9103c651a 100755 --- a/xCAT-nbroot/overlay/etc/init.d/S11stunnel +++ b/xCAT-nbroot/overlay/etc/init.d/S11stunnel @@ -2,7 +2,8 @@ # IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html #Stunnel init for xcat: XCATDEST="" -killall stunnel +XCATPORT=3001 +killall stunnel > /dev/null 2>&1 for parm in `cat /proc/cmdline`; do key=`echo $parm|awk -F= '{print $1}'` if [ "$key" == "xcatd" ]; then @@ -13,14 +14,23 @@ done mkdir -p /etc/stunnel echo 'client=yes' > /etc/stunnel/stunnel.conf echo 'foreground=yes' >> /etc/stunnel/stunnel.conf +echo 'output=/dev/null' >> /etc/stunnel/stunnel.conf +#echo 'socket=a:SO_BINDTODEVICE=lo' >> /etc/stunnel/stunnel.conf echo 'verify=0' >> /etc/stunnel/stunnel.conf -echo '[xcatds]' >> /etc/stunnel/stunnel.conf -echo 'accept=301' >> /etc/stunnel/stunnel.conf -echo 'connect='$XCATDEST >> /etc/stunnel/stunnel.conf +if [ ! -z "$XCATDEST" ]; then + echo '[xcatds]' >> /etc/stunnel/stunnel.conf + echo 'accept=301' >> /etc/stunnel/stunnel.conf + echo 'connect='$XCATDEST >> /etc/stunnel/stunnel.conf +fi if [ -r /tmp/dhcpserver ]; then - echo '[dhcps]' >> /etc/stunnel/stunnel.conf - echo 'accept=300' >> /etc/stunnel/stunnel.conf - echo 'connect='`cat /tmp/dhcpserver`':'$XCATPORT >> /etc/stunnel/stunnel.conf + let i=400; + for srv in `cat /tmp/dhcpserver`; do + echo "[xcatd$i]" >> /etc/stunnel/stunnel.conf + echo "accept=$i" >> /etc/stunnel/stunnel.conf + echo "connect="$srv":"$XCATPORT >> /etc/stunnel/stunnel.conf + let i=i+1 + done fi mkdir -p /usr/var/run/stunnel stunnel & +sleep 2 diff --git a/xCAT-nbroot/overlay/usr/share/udhcpc/default.script b/xCAT-nbroot/overlay/usr/share/udhcpc/default.script index a5cf8a866..892342782 100755 --- a/xCAT-nbroot/overlay/usr/share/udhcpc/default.script +++ b/xCAT-nbroot/overlay/usr/share/udhcpc/default.script @@ -8,8 +8,7 @@ bound|renew) echo $siaddr >> /tmp/dhcpserver /sbin/ifconfig $interface $ip netmask $subnet if [ -n "$router" ] ; then - echo "deleting routers" - while route del default gw 0.0.0.0 dev $interface ; do + while route del default gw 0.0.0.0 dev $interface > /dev/null 2>&1; do : done @@ -20,7 +19,6 @@ bound|renew) echo -n > /etc/resolv.conf [ -n "$domain" ] && echo search $domain >> /etc/resolv.conf for i in $dns ; do - echo adding dns $i echo nameserver $i >> /etc/resolv.conf done ;;