2
0
mirror of https://github.com/xcat2/xcat-core.git synced 2025-10-26 17:05:33 +00:00
Files
xcat-core/xCAT-genesis-scripts/usr/bin/dodiscovery
Bin Xu b962479353 Fix the issue: Failed to discover nodes as the discovery method is set to udef by wrong (#4860)
- zvmdiscovery plugin will not handle findme
 - not change request if no temp discovered bmc nodes, to avoid the confusing error message
 - add more logs, and ignore the `ipmitool sol info` error output
2018-03-01 15:20:25 +08:00

310 lines
14 KiB
Bash
Executable File

#!/bin/bash
log_label="xcat.genesis.dodiscovery"
#the time when the latest findme request is sent to master
reqtime=0
#the timeout value on the waiting for the "processed" response from master
#when the xcatd on master finished the processing of my findme request,
#a "processed" response will be replied
maxresptime=180
minixcatd.awk &
PUBKEY=`openssl rsa -in /etc/xcat/privkey.pem -pubout 2> /dev/null|grep -v "PUBLIC KEY"`
PUBKEY=`echo $PUBKEY|sed -e 's/ //g'`
export PUBKEY
logger -s -t $log_label -p local4.info "Beginning node discovery process..."
for nic in `ip link|grep mtu|grep -v LOOPBACK|grep -v usb|grep -v ,LOWER_UP|awk -F: '{print $2}'`; do
ip link set $nic up
done
waitforlink=100
while [ ! -z "$NICSTOBRINGUP" -a $waitforlink -gt 0 ]; do
NICSTOBRINGUP=`ip link|grep mtu|grep -v LOOPBACK|grep -v usb|grep -v ,LOWER_UP|awk -F: '{print $2}'`
waitforlink=$((waitforlink - 1))
sleep 0.1
if [ $waitforlink = 1 ]; then
logger -s -t $log_label -p local4.warning "No link detected on $NICSTOBRINGUP after 10 seconds"
fi
done
NICSGETTINGADDR=`ip link|grep mtu|grep -v LOOPBACK|grep -v usb|grep ,LOWER_UP|awk -F: '{print $2}'`
timewaiting=0
logger -s -t $log_label -p local4.info "Waiting for nics to get addresses"
while [ ! -z "$NICSGETTINGADDR" -a $timewaiting != 700 ]; do
NEWNICSGETTINGADDR=""
for nic in $NICSGETTINGADDR; do
if ! ip -4 -o a show dev $nic |grep -q inet; then
NEWNICSGETTINGADDR="$NEWNICSGETTINGADDR $nic"
else
echo -n "$nic|"
ip -4 -o addr show dev $nic | awk '{print $4}' | sed -e sX/.*XX
fi
done
sleep 0.1
timewaiting=$((timewaiting+1))
if [ $timewaiting = 699 ]; then
logger -s -t $log_label -p local4.warning "No DHCP answer for $nic, ignoring interface..."
fi
NICSGETTINGADDR=$NEWNICSGETTINGADDR
done
if [ $timewaiting != 700 -a $timewaiting -gt 450 ]; then
logger -s -t $log_label -p local4.warning "Obtained an IP address $NICSGETTINGADDR but it took $timewaiting cycles, you may want to check the spanning tree configuration in the switch."
fi
logger -s -t $log_label -p local4.info "Network configuration complete, commencing transmit of discovery packets"
read XCATMASTER XCATPORT < <(grep xcatd= /proc/cmdline| sed 's/.*xcatd=\([^ ]*\).*/\1/' |tr ':' ' ')
if [[ -z $XCATPORT ]]; then
XCATPORT=3001
fi
export XCATPORT
#time to make our packet...
while [ ! -r /restart ]; do
#record the current time
curtime=$(date +%s)
#the existence of "/processing" indicates that my findme request is under processing
if [ -f "/processing" ]; then
if [ $curtime -gt $((reqtime+maxresptime)) ]; then
#I think my findme request processing is timeout, I will resend the findme request
logger -s -t $log_label -p local4.info "seems the processing of my findme request cost more than $maxresptime, send new findme request"
rm -rf /processing
else
logger -s -t $log_label -p local4.info "My findme request is still under processing, do not send new request"
sleep 5
fi
continue
fi
MTM=unknown
SERIAL=unknown
ARCH=unknown
ARCH=`uname -m` #32-bit only is old news
PLATFORM=unknown
if [ -r /sys/devices/virtual/dmi/id/product_name ]; then #x86
PRODNAME=`cat /sys/devices/virtual/dmi/id/product_name`
IAMAVM=0
if [ "$PRODNAME" = "KVM" ]; then
IAMAVM=1
MTM=KVM
elif [ "$PRODNAME" = "VMware Virtual Platform" ]; then
IAMAVM=1
MTM=VMware
else
MTM=`cat /sys/devices/virtual/dmi/id/product_name|awk -F'[' '{print $2}'|awk -F']' '{print $1}'`
if [ -z "$MTM" ]; then
FRU=`ipmitool fru print 0`
if [ $? -eq 0 ]; then
MTM=`echo "$FRU" | awk -F': ' '/Product Manufacturer/ {m=$2} /Product Name|Product Part Number/ {if (n==""||n=="NONE") {n=$2}} END {print m":"n}'`
fi
if [ -z "$MTM" -o "$MTM" == ":" ]; then
logger -s -t $log_label -p local4.warning "Couldn't find MTM information in FRU, falling back to DMI (MTMS-based discovery may fail)"
m=`cat /sys/devices/virtual/dmi/id/sys_vendor`
n=`cat /sys/devices/virtual/dmi/id/product_name`
MTM="$m:$n"
fi
fi
SERIAL=`cat /sys/devices/virtual/dmi/id/product_serial`
fi
CPUCOUNT=`cat /proc/cpuinfo |grep "model name"|wc -l`
grep "model name" /proc/cpuinfo | while read line; do #to avoid pulling in tail, we do a goofy thing
echo $line > /tmp/cpumod
done
CPUTYPE=`cat /tmp/cpumod|awk -F':' '{print $2}'|sed -e 's/^ //'`
UUID=`sed -e 's/\(..\)\(..\)\(..\)\(..\)-\(..\)\(..\)-\(..\)\(..\)/\4\3\2\1-\6\5-\8\7/' /sys/devices/virtual/dmi/id/product_uuid`
elif [ -r /proc/device-tree/model ]; then #POWER
MTM=`cat /proc/device-tree/model -vT | sed -e 's/^.*,//' | sed -e 's/^[\t ]*//'| sed -e 's/[\t ]*\^@//'`
CPUCOUNT=`cat /proc/cpuinfo |grep -e "^cpu\s*:"|wc -l`
PLATFORM=`cat /proc/cpuinfo | grep -e "^platform\s*:" | awk '{print \$3}'`
grep -e "^cpu\s*:" /proc/cpuinfo | while read line; do #to avoid pulling in tail, we do a goofy thing
echo $line > /tmp/cpumod
done
CPUTYPE=`cat /tmp/cpumod|awk -F':' '{print $2}'|sed -e 's/^ //'`
SERIAL=`cat /proc/device-tree/system-id -vT | sed -e 's/^.*,//' | sed -e 's/^[\t ]*//'| sed -e 's/[\t ]*\^@//'`
# For POWER servers, the /proc/sys/kernel/random/uuid is changing for each query, so use mtms + mac of first up nic as the UUID
#UUID=`sed -e 's/\(..\)\(..\)\(..\)\(..\)-\(..\)\(..\)-\(..\)\(..\)/\4\3\2\1-\6\5-\8\7/' /proc/sys/kernel/random/uuid`
UUID=unknown
fi
# The MEMORY will look like this: 32868920
MEMORY=`cat /proc/meminfo |grep MemTotal|awk '{printf "%.0fMB\n", $2/1024}'`
# The DISKSIZE will look like this: sda:960GB,sdb:960GB,sdc:480GB,sdd:480GB (sorted by major,minor)
DISKSIZE="$(grep -v name /proc/partitions |sort -g -k 1,2 |awk 'BEGIN{sep=""} /[^0-9]$/{printf("%s%s:%.0fGB", sep, $4, $3/1024^2) ; sep=","}')"
logger -s -t $log_label -p local4.info "Beginning echo information to discovery packet file..."
echo '<xcatrequest>' > /tmp/discopacket
echo "<command>findme</command>" >> /tmp/discopacket
echo "<sequential>1</sequential>" >> /tmp/discopacket
echo "<arch>$ARCH</arch>" >> /tmp/discopacket
if [ "$IAMAVM" = 1 ]; then
echo "<nodetype>virtual</nodetype>" >> /tmp/discopacket
fi
echo "<cpucount>$CPUCOUNT</cpucount>" >> /tmp/discopacket
echo "<cputype>$CPUTYPE</cputype>" >> /tmp/discopacket
echo "<memory>$MEMORY</memory>" >> /tmp/discopacket
echo "<disksize>$DISKSIZE</disksize>" >> /tmp/discopacket
if [ "$UUID" != "unknown" ]; then
echo "<uuid>$UUID</uuid>" >> /tmp/discopacket
fi
flag_mtm=`echo "$MTM" | sed 's/0//g'`
if [ "$flag_mtm" ] && [ "$MTM" != "unknown" ]; then
MTM=`echo $MTM | sed 's/\.//g'`
echo "<mtm>$MTM</mtm>" >> /tmp/discopacket
fi
flag_serial=`echo "$SERIAL" | sed 's/0//g'`
if [ "$flag_serial" ] && [ "$SERIAL" != "unknown" ]; then
SERIAL=`echo $SERIAL | sed 's/\.//g'`
echo "<serial>$SERIAL</serial>" >> /tmp/discopacket
fi
if [ "$PLATFORM" != "unknown" ]; then
echo "<platform>$PLATFORM</platform>" >> /tmp/discopacket
fi
LANCHAN=$(ipmitool sol info 2>/dev/null|awk '/Payload Channel/{print $4}')
IsStatic=`ipmitool lan print $LANCHAN | grep 'IP Address Source' | grep 'Static'`
if [ "$IsStatic" ]; then
BMCIPADDR=`ipmitool lan print $LANCHAN | grep 'IP Address' | grep -v 'IP Address Source' | cut -d ":" -f2 | sed 's/ //'`
if [ "$BMCIPADDR" ]; then
echo "<bmc>$BMCIPADDR</bmc>" >> /tmp/discopacket
fi
fi
BMCMAC=`ipmitool lan print $LANCHAN | grep 'MAC Address' | cut -d ":" -f2-7 | sed 's/ //'`
if [ "$BMCMAC" ]; then
echo "<bmcmac>$BMCMAC</bmcmac>" >> /tmp/discopacket
fi
# Check whether the hardware support in-band BMC configuration with the IPMI device
if [ -r /dev/ipmi0 -o -r /dev/ipmi/0 -o -r /dev/ipmidev/0 ]; then
echo "<bmcinband>1</bmcinband>" >> /tmp/discopacket
fi
#so we have some cases where DMI table explictly records every function number, and some where only first function is enumerated
#so for each onboard device, we record it. If it is also the first function, we'll seek out other function numbers and append
#if that other function number does not show up in its own dmi type 41 record
if [ -f "/usr/sbin/dmidecode" ]; then
for onboard in `dmidecode -t 41|egrep '(Type:|Bus Address)'|grep -A1 Ethernet|grep -v Ethernet|sed -e 's/.*Address: //'`; do
obdevs=("${obdevs[@]}" $onboard)
if [ ${onboard#*.} = 0 ]; then
truncslot=${onboard%.*}
for obslot in `grep $truncslot /sys/class/net/*/device/uevent|sed -e s/.*=//`; do
if ! dmidecode -t 41|grep $obslot > /dev/null; then
obdevs=("${obdevs[@]}" $obslot)
fi
done
fi
done
fi
MAC_OF_FIRST_UP_NIC=unknown
#TODO: downed nics, also examine /sys/bus/pci/*/ for more network class devices that might not have driver suppert
for dev in `ip link|grep -B1 ether|grep UP|awk '{print $2}'|sed -e s/://|grep -v usb0`; do
FIRMDESC=""
ONBOARDINDEX=""
DRIVER=`grep DRIVER /sys/class/net/$dev/device/uevent|awk -F= '{print $2}'`
PCI_SLOT=`grep PCI_SLOT_NAME /sys/class/net/$dev/device/uevent|awk -F= '{print $2}'`
ADDRESS=`ip -4 -o a show dev $dev|awk '/global/{print $4}'`
MAC=`ip link show dev $dev|grep ether|awk '{print $2}'| tr /a-f/ /A-F/`
if [ "$MAC_OF_FIRST_UP_NIC" == "unknown" ]; then
MAC_OF_FIRST_UP_NIC=`echo $MAC | sed -e s/://g`
fi
if [[ ! -z "$PCI_SLOT" && -f "/usr/sbin/dmidecode" ]]; then
SLOTNAME=`dmidecode -t 9|egrep '(Designation|Address)'|grep -B1 $PCI_SLOT|grep Designation|sed -e 's/.*Designation:[ ]*//'`
if [ -z "$SLOTNAME" ]; then #check for on board device
index=1
for s in "${obdevs[@]}"; do
if [ "$s" = "$PCI_SLOT" ]; then
ONBOARDINDEX=$index
break
fi
index=$((index+1))
done
if [ ! -z "$ONBOARDINDEX" ]; then
FIRMDESC=`dmidecode -t 41|egrep '(Designation|Address)' |grep -B 1 $PCI_SLOT|grep Designation|sed -e 's/.*: //'`
if [ -z "$FIRMDESC" ]; then
SHORTPCISLOT=${PCI_SLOT%.*}
FIRMDESC=`dmidecode -t 41|egrep '(Designation|Address)' |grep -B 1 $SHORTPCISLOT|grep Designation|sed -e 's/.*: //'`
fi
SLOTNAME="Onboard Ethernet $ONBOARDINDEX"
fi
fi
fi
echo "<mac>$DRIVER|$dev|$MAC|$ADDRESS</mac>" >> /tmp/discopacket
echo "<nic>" >> /tmp/discopacket
echo " <devname>$dev</devname>" >> /tmp/discopacket
echo " <driver>$DRIVER</driver>" >> /tmp/discopacket
if [ ! -z "$ADDRESS" ]; then
echo " <ip4address>$ADDRESS</ip4address>" >> /tmp/discopacket
fi
echo " <hwaddr>$MAC</hwaddr>" >> /tmp/discopacket
if [ ! -z "$PCI_SLOT" ]; then
echo " <pcidev>$PCI_SLOT</pcidev>" >> /tmp/discopacket
fi
if [ ! -z "$SLOTNAME" ]; then
echo " <location>$SLOTNAME</location>" >> /tmp/discopacket
fi
if [ ! -z "$ONBOARDINDEX" ]; then
echo " <onboardeth>$ONBOARDINDEX</onboardeth>" >> /tmp/discopacket
fi
if [ ! -z "$FIRMDESC" ]; then
echo " <firmdesc>$FIRMDESC</firmdesc>" >> /tmp/discopacket
fi
myswitch=`lldptool -n -i $dev -t -V sysName|grep -v 'System Name TLV'|sed -e 's/^ *//'`
if [[ ! -z "$myswitch" && ! "$myswitch" =~ "Agent instance for device not found" ]]; then
echo " <switchname>$myswitch</switchname>" >> /tmp/discopacket
fi
for switchaddr in `lldptool -i $dev -n -t -V mngAddr|grep IP|sed -e 's/.*:.//'`; do
if [[ "$switchaddr" =~ "Agent instance for device not found" ]]; then break; fi
echo " <switchaddr>$switchaddr</switchaddr>" >> /tmp/discopacket
done
myswitchdesc=`lldptool -n -i $dev -t -V sysDesc|grep -v 'System Description TLV'|sed -e 's/^ *//'`
if [[ ! -z "$myswitchdesc" && ! "$myswitchdesc" =~ "Agent instance for device not found" ]]; then
echo " <switchdesc>$myswitchdesc</switchdesc>" >> /tmp/discopacket
fi
myport=`lldptool -n -i $dev -t -V portDesc|grep -v 'Port Description TLV'|sed -e 's/^ *//'`
if [[ ! -z "$myport" && ! "$myport" =~ "Agent instance for device not found" ]]; then
echo " <switchport>$myport</switchport>" >> /tmp/discopacket
fi
echo "</nic>" >> /tmp/discopacket
done
if [ "$UUID" == "unknown" ]; then
UUID=`echo $MTM-$SERIAL-$MAC_OF_FIRST_UP_NIC | tr /A-Z/ /a-z/`
echo "<uuid>$UUID</uuid>" >> /tmp/discopacket
fi
echo "<xcatpubkey>$PUBKEY</xcatpubkey>" >> /tmp/discopacket #this is not secure to use by itself, switch sourced pubkey for security
echo "<sha512sig>" >> /tmp/discopacket
echo "</sha512sig>" >> /tmp/discopacket
echo "</xcatrequest>" >> /tmp/discopacket
openssl dgst -sha512 -out /tmp/discopacket.sha512 -sign /etc/xcat/privkey.pem /tmp/discopacket
openssl enc -e -a -in /tmp/discopacket.sha512 > /tmp/discopacket.b64sig
cat /tmp/discopacket |while read line; do
if [ "$line" = "</sha512sig>" ]; then
cat /tmp/discopacket.b64sig >> /tmp/discopacket.new
fi
echo $line >> /tmp/discopacket.new
done
mv /tmp/discopacket.new /tmp/discopacket
logger -s -t $log_label -p local4.info "Discovery packet file is ready."
rm -f /tmp/discopacket.gz
gzip -9 /tmp/discopacket
if [ ! -z "$XCATMASTER" ]; then
logger -s -t $log_label -p local4.info "Sending the discovery packet to xCAT ($XCATMASTER:$XCATPORT)..."
(cat /tmp/discopacket.gz | udpcat.awk $XCATMASTER $XCATPORT ) &
fi
for dhcps in `grep dhcp-server /var/lib/dhclient/dhclient.leases|awk '{print $4}'|sed -s 's/;//'`; do
logger -s -t $log_label -p local4.info "Sending the discovery packet to xCAT ($dhcps:$XCATPORT)..."
(cat /tmp/discopacket.gz | udpcat.awk $dhcps $XCATPORT ) &
done
#record the time when the "findme request is sent to master"
reqtime=$(date +%s)
#cat /tmp/discopacket
logger -s -t $log_label -p local4.info "Sleeping 5 seconds..."
sleep 5
done
logger -s -t $log_label -p local4.info "Restart network interfaces..."
/bin/restart