2
0
mirror of https://github.com/xcat2/xcat-core.git synced 2025-06-17 20:00:19 +00:00
Files
xcat-core/xCAT-genesis-scripts/bin/doxcat
Victor Hu c1673f730b Since the genesis kernel is not intended to be long running, there's no need to start ntp daemon, simply attempt to sync with ntpdate.
There's an issue in this code where we query ntpq for the offset too quickly after starting the ntp daemon.  This causes the offset to always be 0 and we continue.  We have seen situations where ntpd starts before the first check and becomes > 1000.  Then it takes 15 minutes before the 2nd attempt and delays the hardware discovery process
2016-12-13 11:15:27 -05:00

434 lines
16 KiB
Plaintext
Executable File

#
# 2013.02.07 Brian Elliott Finley <bfinley@us.ibm.com>
# - Added slash in front of "var" in the NICSTOBRINGUP dhclient section.
# Bug reported by Jeff Lang <jrlang@uwyo.edu>. Thanks, Jeff!
#
log_label="xcat.genesis.doxcat"
# Start rsyslogd and log into a local file specified in /etc/rsyslog.conf
# Later, once xCAT MN is known, dhclient-script will change
# rsyslog.conf file to send log entries to xCAT MN
logger -s -t $log_label -p local4.info "Starting syslog..."
ls /var/run/
RSYSLOGD_VERSION=`rsyslogd -v | grep "rsyslogd" | cut -d" " -f2 | cut -d"." -f1`
# if syslog is running and there's a pid file, kill it before restarting syslogd
if [ -f /var/run/syslogd.pid ]; then
kill -TERM `cat /var/run/syslogd.pid`
fi
if [ $RSYSLOGD_VERSION -ge 8 ]; then
/sbin/rsyslogd
# Newer vers of rsyslogd (8 and higher) do not support -c flag anymore
else
/sbin/rsyslogd -c4
fi
logger -s -t $log_label -p local4.info "Beginning doxcat process..."
modprobe acpi_cpufreq 2>/dev/null # on some machines this fails
modprobe cpufreq_ondemand
if ls /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor &>/dev/null; then
for gov in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor; do
echo -n ondemand > $gov
done
fi
if [ ! -z "$BOOTIF" ]; then
BOOTIF=`echo $BOOTIF|sed -e s/01-// -e s/-/:/g`
logger -s -t $log_label -p local4.info "Waiting for device with address $BOOTIF to appear.."
gripeiter=6000
while [ -z "$bootnic" ]; do
bootnic=`ip link show|grep -B1 $BOOTIF|grep mtu|awk '{print $2}'|sed -e 's/:$//'`
sleep 0.1
if [ $gripeiter = 0 ]; then
logger -s -t $log_label -p local4.err "Unable to find boot device (Maybe the xCAT genesis kernel is missing the driver for your NIC?)"
while :; do sleep 365d; done
fi
gripeiter=$((gripeiter-1))
done
fi
echo "Done"
if [ -z "$bootnic" ]; then
logger -s -t $log_label -p local4.err "BOOTIF missing, can't detect boot nic"
fi
if [ -r /sys/devices/virtual/dmi/id/product_uuid ]; then
duid='default-duid "\\000\\004';
#product_uuid in sysfs fails to cope with endianness of SMBIOS 2.6, unconditionnaly swap. Technically leads to incorrect DUID in 'older' systems but matches MS behavior
for i in `sed -e 's/\(..\)\(..\)\(..\)\(..\)-\(..\)\(..\)-\(..\)\(..\)/\4\3\2\1-\6\5-\8\7/;s/-//g;s/\(..\)/\1 /g' /sys/devices/virtual/dmi/id/product_uuid`; do
octnum="\\"`printf "\\%03o" 0x$i`
duid=$duid$octnum
done
duid=$duid'";'
echo $duid > /var/lib/dhclient/dhclient6.leases
fi
rpcbind
rpc.statd
# Try -A available on more current version of ssh-keygen to generate all keys
ssh-keygen -A 2> /dev/null
if [ $? -ne 0 ]; then
# -A option did not work, must be running on older version of
# ssh-keygen, try generating needed keys one at a time
ssh-keygen -q -t rsa -f /etc/ssh/ssh_host_rsa_key -C '' -N ''
ssh-keygen -q -t dsa -f /etc/ssh/ssh_host_dsa_key -C '' -N ''
fi
echo 'Protocol 2' >> /etc/ssh/sshd_config
/usr/sbin/sshd
mkdir -p /etc/xcat
mkdir -p /etc/pki/tls
echo "[ req ]
distinguished_name = nodedn
[ nodedn ]" > /etc/pki/tls/openssl.cnf
logger -s -t $log_label -p local4.info "Generating private key..."
openssl genrsa -out /etc/xcat/privkey.pem 1024 >& /dev/null
logger -s -t $log_label -p local4.info "Done"
PUBKEY=`openssl rsa -in /etc/xcat/privkey.pem -pubout 2> /dev/null|grep -v "PUBLIC KEY"`
PUBKEY=`echo $PUBKEY|sed -e 's/ //g'`
export PUBKEY
logger -s -t $log_label -p local4.info "Creating /var/lib/lldpad file..."
mkdir -p /var/lib/lldpad
echo 'lldp :' >> /var/lib/lldpad/lldpad.conf
echo '{' >> /var/lib/lldpad/lldpad.conf
for iface in `ip link |grep -v '^ '|awk '{print $2}'|sed -e 's/:$//'|grep -v lo`; do
echo "$iface :" >> /var/lib/lldpad/lldpad.conf
echo "{" >> /var/lib/lldpad/lldpad.conf
echo "tlvid00000006 :" >> /var/lib/lldpad/lldpad.conf
echo "{" >> /var/lib/lldpad/lldpad.conf
echo info = '"'$PUBKEY'";' >> /var/lib/lldpad/lldpad.conf
echo 'enableTx = true;' >> /var/lib/lldpad/lldpad.conf
echo '};' >> /var/lib/lldpad/lldpad.conf
echo 'adminStatus = 3;' >> /var/lib/lldpad/lldpad.conf
echo '};' >> /var/lib/lldpad/lldpad.conf
done
echo '};' >> /var/lib/lldpad/lldpad.conf
lldpad -d
logger -s -t $log_label -p local4.info "lldpad started."
# Caclulate the broadcast address of a given IP address and mask.
bcastcalc(){
read oct1 oct2 oct3 oct4 << HERE
$(echo "$1" | sed -e 's/\./ /g')
HERE
read msk1 msk2 msk3 msk4 << HERE
$(echo "$2" | sed -e 's/\./ /g')
HERE
ipa=$(($oct1+(255-($oct1 | $msk1))))
ipb=$(($oct2+(255-($oct2 | $msk2))))
ipc=$(($oct3+(255-($oct3 | $msk3))))
ipd=$(($oct4+(255-($oct4 | $msk4))))
echo "$ipa.$ipb.$ipc.$ipd"
}
# Calculates the number of bits in a netmask for converting something like 255.255.255.192 to 26 so
# you can use the notation 10.0.0.1/26
mask2prefix() {
nbits=0
old_ifs=$IFS
IFS=.
for dec in $1 ; do
case $dec in
255) let nbits+=8;;
254) let nbits+=7;;
252) let nbits+=6;;
248) let nbits+=5;;
240) let nbits+=4;;
224) let nbits+=3;;
192) let nbits+=2;;
128) let nbits+=1;;
0);;
*) logger -s -t $log_label -p local4.err "$dec is not recognised"; exit 1
esac
done
IFS=$old_ifs
echo "$nbits"
}
# see if they specified static ip info, otherwise use dhcp
XCATPORT=3001
for parm in `cat /proc/cmdline`; do
key=`echo $parm|awk -F= '{print $1}'`
value=`echo $parm|awk -F= '{print $2}'`
if [[ ${key,,} == "hostip" || ${key,,} == "ipaddr" ]]; then
hostip=$value
elif [[ ${key,,} == "netmask" ]]; then
netmask=$value
elif [[ ${key,,} == "gateway" ]]; then
gateway=$value
elif [[ ${key,,} == "xcatd" ]]; then
XCATMASTER=`echo $value |awk -F: '{print $1}'`
XCATPORT=`echo $value |awk -F: '{print $2}'`
fi
done
export XCATPORT
export XCATMASTER
logger -s -t $log_label -p local4.info "XCATMASTER is $XCATMASTER, XCATPORT is $XCATPORT"
head -n -1 /etc/rsyslog.conf > /etc/rsyslog.conf.new
cp /etc/rsyslog.conf.new /etc/rsyslog.conf
echo "*.* @$XCATMASTER" >> /etc/rsyslog.conf
if [[ -n $hostip && -n $netmask && -n $gateway && -n $bootnic ]]; then
# doing static ip
# the device was determined above from the bootif mac, and put in bootnic
numbits=$(mask2prefix $netmask)
broadcast=$(bcastcalc $hostip $netmask)
logger -s -t $log_label -p local4.info "Setting static IP=$hostip/$numbits broadcast=$broadcast gateway=$gateway device=$bootnic BOOTIF=$BOOTIF ..."
ip addr add $hostip/$numbits broadcast $broadcast dev $bootnic scope global label $bootnic
ip link set $bootnic up
ip route replace to default via $gateway dev $bootnic
# in softlayer it takes up to 60 seconds for the nic to actually be able to communicate
logger -s -t $log_label -p local4.info "Waiting to reach xCAT mgmt node $gateway."
xcatretries=60
while [ $((xcati+=1)) -le $xcatretries ] && ! ping -c2 -w3 $gateway >/dev/null 2>&1; do echo -n .; done
if [ $xcati -le $xcatretries ]; then echo " success"; else echo " failed"; fi
sleep 3
else
logger -s -t $log_label -p local4.info "Setting IP via DHCP..."
# This section is for System P hardware discovery, which won't have a BOOTIF value set
if [ -z "$bootnic" ]; then
tries=0
while [ $tries -lt 100 ]; do
ALLUP_NICS=`ip link show | grep -v "^ " | grep "state UP" | awk '{print $2}' | sed -e 's/:$//'|grep -v lo | sort -n -r`
for tmp1 in $ALLUP_NICS; do
dhclient -cf /etc/dhclient.conf -pf /var/run/dhclient.$tmp1.pid $tmp1 &
dhclient -6 -pf /var/run/dhclient6.$tmp1.pid $tmp1 -lf /var/lib/dhclient/dhclient6.leases &
#bootnic=$tmp1
#break
done
if [ ! -z "$ALLUP_NICS" ]; then
break
fi
sleep 2
tries=$(($tries+1))
done
logger -s -t $log_label -p local4.info "Acquiring network addresses.."
tries=0
while [ -z "$bootnic" ]; do
for tmp1 in $ALLUP_NICS; do
if ip addr show dev $tmp1|grep -v 'scope link'|grep -v 'dynamic'|grep -v inet6|grep inet > /dev/null; then
result=`ping -c1 -I $tmp1 $XCATMASTER 2>&1`
if [ $? -eq 0 ]; then
logger -s -t $log_label -p local4.info "the nic $tmp1 can ping $XCATMASTER"
bootnic=$tmp1
break
fi
fi
done
sleep 3
tries=$(($tries+1))
# Wait for 60 seconds to make sure the STP is done for at least one port
if [ $tries -ge 20 ]; then
break
fi
done
if [ -z "$bootnic" ]; then
logger -s -t $log_label -p local4.info "still can not get bootnic, go into /bin/bash"
/bin/bash
fi
else
dhclient -cf /etc/dhclient.conf -pf /var/run/dhclient.$bootnic.pid $bootnic &
#we'll kick of IPv6 and IPv4 on all nics, but not wait for them to come up unless doing discovery, to reduce
#chances that we'll perform a partial discovery
#in other scenarios where downed non-bootnics cause issues, will rely on retries to fix things up
dhclient -6 -pf /var/run/dhclient6.$bootnic.pid $bootnic -lf /var/lib/dhclient/dhclient6.leases &
NICSTOBRINGUP=`ip link|grep mtu|grep -v LOOPBACK|grep -v $bootnic|grep -v usb|grep -v ,UP|awk -F: '{print $2}'`
export NICSTOBRINGUP
for nic in $NICSTOBRINGUP; do
(while ! ethtool $nic | grep Link\ detected|grep yes > /dev/null; do sleep 5; done; dhclient -cf /etc/dhclient.conf -pf /var/run/dhclient.$nic.pid $nic ) &
(while ! ethtool $nic | grep Link\ detected|grep yes > /dev/null; do sleep 5; done; dhclient -cf /etc/dhclient.conf -6 -pf /var/run/dhclient6.$nic.pid -lf /var/lib/dhclient/dhclient6.leases $nic ) &
done
gripeiter=101
logger -s -t $log_label -p local4.info "Acquiring network addresses.."
while ! ip addr show dev $bootnic|grep -v 'scope link'|grep -v 'dynamic'|grep -v inet6|grep inet > /dev/null; do
sleep 0.1
if [ $gripeiter = 1 ]; then
logger -s -t $log_label -p local4.info "It seems to be taking a while to acquire an IPv4 address, you may want to check spanning tree..."
fi
gripeiter=$((gripeiter-1))
done
fi
fi
openssl genrsa -out /etc/xcat/certkey.pem 4096 > /dev/null 2>&1 &
logger -s -t $log_label -p local4.info "Acquired IPv4 address on $bootnic"
ip addr show dev $bootnic|grep -v 'scope link'|grep -v 'dynamic'|grep -v inet6|grep inet|awk '{print $2}'
logger -s -t $log_label -p local4.info "Attempting to sync the date with $XCATMASTER..."
ntpdate $XCATMASTER
if [ -e "/dev/rtc" ]; then
logger -s -t $log_label -p local4.info "Attempting to sync hardware clock..."
( sleep 8 ; hwclock --systohc ) </dev/null >/dev/null 2>&1 &
disown
fi
logger -s -t $log_label -p local4.info "Restarting syslog..."
read -r RSYSLOG_PID </var/run/syslogd.pid 2>/dev/null
kill "$RSYSLOG_PID" 2>/dev/null
while kill -0 "$RSYSLOG_PID" 2>/dev/null
do
sleep 0.5
done
unset RSYSLOG_PID
RSYSLOGD_VERSION=`rsyslogd -v | awk '/rsyslogd/ { split($2, a, "."); print a[1]; }'`
if [ "$RSYSLOGD_VERSION" -ge 8 ]; then
/sbin/rsyslogd
else
/sbin/rsyslogd -c4
fi
HOST_ARCH=`uname -m`
if echo $HOST_ARCH | grep "ppc64"; then
modprobe ipmi_powernv
else
modprobe ipmi_si
fi
modprobe ipmi_devintf
IPMI_RC=`ipmitool mc info >/dev/null 2>&1; echo $?`
IPMI_SUPPORT=1
if [ $IPMI_RC -ne 0 ]; then
IPMI_SUPPORT=0
fi
DEVICE=$bootnic
export DEVICE
if [ "$destiny" != "discover" ]; then #we aren't discoverying, we probably can and should get a cert
logger -s -t $log_label -p local4.info "Getting initial certificate --> $XCATMASTER:$XCATPORT"
/bin/getcert $XCATMASTER:$XCATPORT
fi
while :; do
grepconfigraid=`echo $destiny|grep "configraid"`
if [ -z "$destiny" -o -n "$grepconfigraid" ]; then
logger -s -t $log_label -p local4.info "Running getdestiny --> $XCATMASTER:$XCATPORT"
destiny=`getdestiny $XCATMASTER:$XCATPORT`
logger -s -t $log_label -p local4.info "Received destiny=$destiny"
fi
# parse out some values from the destiny
destparameter=`echo $destiny|cut -d '=' -f 2-`
destiny=`echo $destiny|awk -F= '{print $1}'`
dest=`echo $destiny|awk '{print $1}'` #could probably use bash but oh well
logger -s -t $log_label -p local4.info "The destiny=$dest, destiny parameters=$destparameter"
if [ "$dest" = "discover" ]; then #skip a query to xCAT when /proc/cmdline will do
logger -s -t $log_label -p local4.info "Running dodiscovery..."
/bin/dodiscovery
logger -s -t $log_label -p local4.info "dodiscovery - Complete."
logger -s -t $log_label -p local4.info "Getting certificate --> $XCATMASTER:$XCATPORT"
/bin/getcert $XCATMASTER:$XCATPORT
destiny=''
dest=''
XCAT_DISCOVERY_WAS_DONE=1
export XCAT_DISCOVERY_WAS_DONE
elif [ "$dest" = shell ]; then
logger -s -t $log_label -p local4.info "Dropping to debug shell(exit to run next destiny)..."
destiny=''
dest=''
/bin/bash
logger -s -t $log_label -p local4.info "Exited shell."
logger -s -t $log_label -p local4.info "Running nextdestiny $XCATMASTER:$XCATPORT..."
/bin/nextdestiny $XCATMASTER:$XCATPORT
logger -s -t $log_label -p local4.info "nextdestiny - Complete."
elif [ "$dest" = runcmd ]; then
logger -s -t $log_label -p local4.info "Running nextdestiny $XCATMASTER:$XCATPORT..."
destiny=`/bin/nextdestiny $XCATMASTER:$XCATPORT`
dest=`echo $destiny|awk -F= '{print $1}'`
$destparameter
logger -s -t $log_label -p local4.info "nextdestiny - Complete."
elif [ "$dest" = runimage ]; then
logger -s -t $log_label -p local4.info "Running nextdestiny $XCATMASTER:$XCATPORT..."
destiny=`/bin/nextdestiny $XCATMASTER:$XCATPORT`
dest=`echo $destiny|awk -F= '{print $1}'`
logger -s -t $log_label -p local4.info "nextdestiny - Complete."
mkdir /tmp/`basename $destparameter`
cd /tmp/`basename $destparameter`
eval destparameter=$destparameter
ERROR=`wget $destparameter 2>&1`
rc=$?
if [ $rc -ne 0 ]; then
logger -s -t $log_label -p local4.err "Failed to wget $destparameter, $ERROR"
fi
while [ $rc -ne 0 ] && echo $ERROR|grep -v 416; do
sleep 10
ERROR=`wget -c $destparameter 2>&1`
rc=$?
if [ $rc -ne 0 ]; then
logger -s -t $log_label -p local4.err "Failed to wget $destparameter, $ERROR"
fi
done
tar xvf `basename $destparameter`
./runme.sh
cd -
elif [ "$dest" = "reboot" -o "$dest" = "boot" ]; then
logger -s -t $log_label -p local4.info "Running nextdestiny $XCATMASTER:$XCATPORT..."
/bin/nextdestiny $XCATMASTER:$XCATPORT
logger -s -t $log_label -p local4.info "nextdestiny - Complete."
if [ $IPMI_SUPPORT -ne 0 ]; then
ipmitool chassis bootdev pxe
fi
reboot -f
elif [ "$dest" = "install" -o "$dest" = "netboot" ]; then
if [ $IPMI_SUPPORT -ne 0 ]; then
ipmitool chassis bootdev pxe
fi
logger -s -t $log_label -p local4.info "Reboot..."
reboot -f
elif [ "$dest" = sysclone ]; then
logger -s -t $log_label -p local4.info "Running dosysclone..."
/bin/dosysclone
logger -s -t $log_label -p local4.info "dosysclone - Complete."
destiny=''
dest=''
elif [ "$dest" = standby ]; then
delay=$((30+$RANDOM%270))
while [ $delay -gt 0 ]; do
if [ $((delay%10)) == 0 ]; then
if [ "$XCAT_DISCOVERY_WAS_DONE" == 1 ]; then
logger -s -t $log_label -p local4.info "Received request=$dest, will call xCAT back in $delay seconds. Discovery is complete, run nodeset on this node to provision an Operating System"
else
logger -s -t $log_label -p local4.info "Received request to retry in a bit, will call xCAT back in $delay seconds"
fi
fi
delay=$((delay-1))
sleep 1
done
destiny=''
dest=''
echo "Retrying ";
elif [ "$dest" = shutdown ]; then
logger -s -t $log_label -p local4.info "Poweroff..."
poweroff -f
else
logger -s -t $log_label -p local4.err "Unrecognized directive (dest=$dest)"
destiny=''
dest=''
delay=$((30+$RANDOM%270))
while [ $delay -gt 0 ]; do
if [ $((delay%10)) == 0 ]; then
logger -s -t $log_label -p local4.info "... Will retry xCAT in $delay seconds"
fi
delay=$((delay-1))
sleep 1
done
fi
done
logger -s -t $log_label -p local4.info "doxcat is complete"
set +x