From 9bd662e82a35544479302462b5d28f7881ab13a4 Mon Sep 17 00:00:00 2001 From: daniceexi Date: Sun, 30 Jun 2013 04:28:19 +0000 Subject: [PATCH] Code drop for Xeon Phi (mic) support. Run on host node to configure the mic (network,hostname,onboot,vlog) git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@16835 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd --- xCAT-server/sbin/configmic | 350 +++++++++++++++++++++++++++++++++++++ 1 file changed, 350 insertions(+) create mode 100755 xCAT-server/sbin/configmic diff --git a/xCAT-server/sbin/configmic b/xCAT-server/sbin/configmic new file mode 100755 index 000000000..ad8cfdce0 --- /dev/null +++ b/xCAT-server/sbin/configmic @@ -0,0 +1,350 @@ +#!/usr/bin/env perl + +# This script is used to configure the mics on the host. +# This script is run by xdsh from MN/SN to the host +# parameters +# -m xcatmaster +# -p the path of the mic configuration file. Generally, it's /tftpboot/xcat/miccfg/miccfg.hostname + +use strict; +use IO::Socket; + +use File::Path; +use File::Copy; +use Getopt::Long; + +# enable the autoflush of stdout +select STDOUT; +$| = 1; + +my $tmppath = "/tmp/mictmp"; +my $logpath = "/var/log/xcat/"; +my $logfile = "$logpath/configmic.log"; +my $micmnt = "/opt/intel/mic/mnt"; + +mkpath $tmppath; +mkpath $micmnt; +#open the log file +open (LOG, ">>$logfile") or die "Error: cannot open $logfile\n"; +print LOG "\n\n====================================================\nStart mic configuratoin: ".`date`."\n"; + +my ($master, $cfgpath); +GetOptions ('m=s'=>\$master, 'p=s'=>\$cfgpath); +unless ($master && $cfgpath) { + outputmsg("Error: the -m master and -p path arguments must be specified for configmic.\n", 1); +} + +# get the correct host name for the host +my $nodename; +my $masterip = `getent hosts $master | awk {'print \$1'}`; +chomp($masterip); +my $myip = `ip route get $masterip| head -n 1 | sed 's/^.*src//g' | awk {'print \$1'}`; +my $myipinfo =`getent hosts $myip`; +if ($myipinfo =~ /([^\s]+)\s+([^\s]+)\s+([^\s]+)/) { + my $n1 = $2; + my $n2 = $3; + if (length($n1) > length($n2)) { + $nodename = $n2; + } else { + $nodename = $n1; + } +} elsif ($myipinfo =~ /([^\s]+)\s+([^\s]+)/) { + $nodename = $2; +} else { + outputmsg("Error: cannot get the hostname of the host node\n", 2); +} + +# download the mic configuration file from master +my $cmd = "wget -N --waitretry=10 --random-wait -T 60 http://$master/$cfgpath/miccfg.$nodename -P $tmppath"; +my ($rc, $output) = runsyscmd ($cmd, "Error: failed to download mic configuration file from $master\n", 3); + +unless (-r "$tmppath/miccfg.$nodename") { + runsyscmd ("Error: cannot get the mic configuration file from http://$master/$cfgpath/miccfg.$nodename\n", 4); +} + +# parse the configuration file +unless (open (CFGFILE, "<$tmppath/miccfg.$nodename")) { + runsyscmd ("Error: cannot open $tmppath/miccfg.$nodename\n", 5); +} + +# the configureation file should have the following format +#miclist=mic0 +#0:ip=10.10.10.1|br=mybr0|name=host1-mic0|onboot=yes|vlog=no +#imgpath=/install/mpss3.1 +#overlay=ol1 + +my %miccfg; +my $miclist; +my $overlay; +my $ospath; +while () { + if (/(\d+):(.*)/) { + my $deviceid = $1; + my @params = split (/\|/, $2); + foreach (@params) { + my ($n, $v) = split (/=/, $_); + $miccfg{$deviceid}{$n} = $v; + } + } elsif (/^miclist=(.*)/) { + $miclist = $1; + } elsif (/^overlay=(.*)/) { + $overlay = $1; + } elsif (/^imgpath=(.*)/) { + $ospath= $1; + } +} +close (CFGFILE); + +$miclist =~ s/,/ /g; + +# add the mount entry for mounting of root fs from master to /etc/fstab +# e.g. mount $master:/install/mpss3 /opt/intel/mic/mnt +$cmd = "grep $master:$ospath $micmnt /etc/fstab "; +($rc, $output) = runsyscmd ($cmd); +if ($rc) { + # not found the exact mount entry + $cmd = "grep $micmnt /etc/fstab"; + ($rc, $output) = runsyscmd ($cmd); + if (!$rc) { + # found the mount to $micmnt with another master or directory, remove the entry and umount it + my $trans = $micmnt; + $trans =~ s/\//\\\//g; + $cmd = "sed \"/$trans/d\" /etc/fstab > $tmppath/fstab.tmp"; + runsyscmd ($cmd, "Error: failed to configure fstab.\n", 6); + copy ("$tmppath/fstab.tmp", "/etc/fstab"); + $cmd = "umount -l -f $micmnt"; + runsyscmd ($cmd, "Error: failed to run umount -l -f $micmnt\n", 7); + } + $cmd = "echo \"$master:$ospath $micmnt nfs timeo=14,intr 1 2\" >>/etc/fstab"; + runsyscmd ($cmd); + $cmd = "mount -a"; + runsyscmd ($cmd); +} + +# make sure the remote files are accessable +unless (-r "$micmnt/opt/intel/mic/filesystem/base.filelist") { + outputmsg("Error: cannot access the $micmnt/opt/intel/mic/filesystem/base.filelist\n", 8); +} + +# start to configure the mic +# stop the mpss service first +$cmd = "service mpss stop"; +runsyscmd ($cmd, "Error: failed to stop mpss service.\n", 100); + +# make sute the mpss has been stopped +my $i = 5; +while ($i > 0) { + $cmd = "service mpss status"; + ($rc, $output) = runsyscmd ($cmd); + if (grep /mpss is stopped/, @$output) { + last; + } + sleep 2; + $i--; +} + +# remove the mic configuration file +unlink ("/etc/sysconfig/mic/default.conf"); +foreach my $mic (split (/ /, $miclist)) { + unlink ("/etc/sysconfig/mic/$mic.conf"); + unlink ("/opt/intel/mic/filesystem/$mic.filelist"); + rmtree ("/opt/intel/mic/filesystem/$mic"); +} + +# reset the configuration to default +$cmd = "micctrl --initdefaults $miclist"; +runsyscmd ($cmd, "Error: failed to initiate the mic devices.\n", 200); + +# configure the base dir +$cmd = "micctrl --basedir=/opt/intel/mic/mnt/opt/intel/mic/filesystem/base --list=/opt/intel/mic/mnt/opt/intel/mic/filesystem/base.filelist $miclist"; +runsyscmd ($cmd, "Error: failed to change the base dir for mic file system..\n", 101); + +# configure the commondir +$cmd = "micctrl --commondir=/opt/intel/mic/mnt/opt/intel/mic/filesystem/common --list=/opt/intel/mic/mnt/opt/intel/mic/filesystem/common.filelist $miclist"; +runsyscmd ($cmd, "Error: failed to change the common dir for mic file system.\n", 102); + +# configure the overlay +my @ols = split (/,/, $overlay); +foreach (@ols) { + $cmd = "micctrl --overlay=filelist --state=on --source=/opt/intel/mic/mnt/opt/intel/mic/filesystem/overlay/$_ --target=/opt/intel/mic/mnt/opt/intel/mic/filesystem/overlay/$_.filelist $miclist"; + runsyscmd ($cmd, "Error: failed to change the overlay dir for mic file system.\n", 103); +} + +# do the mic specific configuration +$cmd = "ip -4 addr show"; +($rc, $output) = runsyscmd ($cmd); +$cmd = "ip -4 route show"; +my ($rc2, $output2) = runsyscmd ($cmd); +foreach my $micid (keys %miccfg) { + my $micname = $miccfg{$micid}{'name'}; + # set the boot device to be staticramfs so that the osimage don't need to generated for every boot + $cmd = "micctrl --rootdev=StaticRamFS --target=/opt/intel/mic/filesystem/$micname.image mic$micid"; + runsyscmd ($cmd, "Error: failed to set root image for mic.\n", 104); + + # set the linux kernel location + $cmd = "micctrl --osimage=$micmnt/lib/firmware/mic/uos.img mic$micid"; + runsyscmd ($cmd, "Error: failed to linux kernle location for mic.\n", 105); + + # set the autoboot + if ($miccfg{$micid}{'onboot'} =~ /no/i) { + $cmd = "micctrl --autoboot=no mic$micid"; + } elsif($miccfg{$micid}{'onboot'} =~ /yes/i) { + $cmd = "micctrl --autoboot=yes mic$micid"; + } + runsyscmd ($cmd, "Error: failed to set the autoboot for mic.\n", 106); + + # set the hostname + $cmd = "sed \"s/Hostname .*/Hostname \"$micname\"/\" /etc/sysconfig/mic/mic$micid.conf > $tmppath/mic$micid.conf"; + runsyscmd ($cmd, "Error: failed to set hostname for mic.\n", 107); + copy ("$tmppath/mic$micid.conf", "/etc/sysconfig/mic/mic$micid.conf"); + + # configure the Verbose log + if ($miccfg{$micid}{'vlog'} =~ /yes/i) { + $cmd = "sed \"s/VerboseLogging .*/VerboseLogging \"Enabled\"/\" /etc/sysconfig/mic/mic$micid.conf > $tmppath/mic$micid.conf"; + runsyscmd ($cmd, "Error: failed to set Verbose log for mic.\n", 108); + copy ("$tmppath/mic$micid.conf", "/etc/sysconfig/mic/mic$micid.conf"); + } + + # configure the bridge and ip for the mic + # get the ip of the bridge + my $brg = $miccfg{$micid}{br}; + my ($brip, $netbit, $brc, $mtu); + foreach (@$output) { + if (/inet\s+([\d\.]+)\/(\d+)\s+brd\s+([\d\.]+) scope global $brg/) { + $brip = $1; + $netbit = $2; + $brc = $3; + last; + } elsif (/\d+:\s+$brg:.*mtu\s+(\d+)/) { + $mtu = $1; + } + } + + unless ($brip && $netbit && $brc) { + outputmsg("Error: failed to get ip for the bridge $brg.\n", 110); + } + + # add the bridge to mic configuration + # since the micctrl --addbridge= --type=external --ip= --netbits=8 does not + # work with '--netbis=8', the bridge configuration has been done by changing cfg file directly. + $cmd = "echo \"Bridge $brg External $brip $netbit $mtu\" >> /etc/sysconfig/mic/default.conf"; + runsyscmd ($cmd); + + # configre network for each mic + $cmd = "micctrl --network=static --bridge=".$brg." --ip=".$miccfg{$micid}{ip}." mic$micid"; + runsyscmd ($cmd, "Error: failed to generate IP configuration for mic.\n", 104); + + # take the configuration to effect + $cmd = "micctrl --resetconfig mic$micid"; + runsyscmd ($cmd, "Error: failed to spread the configuration.\n", 201); + + # get the gateway of the bridge + my $netmask = 2**$netbit - 1 << (32 - $netbit); + my $brnet = unpack("N", inet_aton($brip)); + $brnet &= $netmask; + $brnet = inet_ntoa(pack("N", $brnet)); + my $brgw; + foreach (@$output2) { + if (/$brnet\/$netbit\s+dev\s+$brg.*src\s+([\d\.]+)/) { + $brgw = $1; + last; + } + } + + # configure the mic interface to the real interface cfg file instead of mic cfg file + # this also because the defect of intel tool so that we have to do it separated + my @ifcfg = ("IPADDR=$miccfg{$micid}{ip}\n", "GATEWAY=$brgw\n", "PREFIX=$netbit\n"); + unless (open (IFCFG, ">/opt/intel/mic/filesystem/mic$micid/etc/sysconfig/network/ifcfg-mic0")) { + outputmsg ("Error: cannot open ifcfg-mic$micid\n", 111); + } + print IFCFG @ifcfg; + close (IFCFG); + + # copy the system files which generated by genimage to the micdir + # e.g. /etc/hosts /etc/passwd ... + my $src = "$micmnt/opt/intel/mic/filesystem/overlay/system/*"; + my $dst = "/opt/intel/mic/filesystem/mic$micid"; + $cmd = "/bin/cp -rf $src $dst"; + runsyscmd ($cmd, "Error: failed to copy the overlay dir.\n", 300); + + # generate the static root file system in ramdisk format + $cmd = "micctrl --updateramfs mic$micid"; + runsyscmd ($cmd, "Error: failed to generate the static ramfs.\n", 301); +} + +# start the mpss service after the configuration +$cmd = "service mpss start >/dev/null 2>&1"; +system($cmd); + +$i = 5; +while ($i > 0) { + $cmd = "service mpss status"; + ($rc, $output) = runsyscmd ($cmd, "Error: failed to get the status of mpss.\n", 100); + if (grep /mpss is running/, @$output) { + last; + } + sleep 2; + $i--; +} + +# notice nodeset command, the configuratoin has been done +foreach my $micid (keys %miccfg) { + outputmsg ("MICMSG:$miccfg{$micid}{'name'}: Done\n"); +} + +print LOG "mpss has been started\n"; +print LOG "\nFinish the mic configuratoin: ".`date`."====================================================\n"; + +close (LOG); + +# since the start of mpss service created a new process and for some reason the process caused the hang of +# sshd root@notty on the host (that means the xdsh cannot get connection closed from host), a work around +# is to kill the sshd connection by force +$cmd = "ps -ef | grep sshd | grep notty |awk -F\' \' \'{print \$2}\'"; +($rc, $output) = runsyscmd ($cmd); +foreach (@$output) { + kill 15, $_; +} + +exit 0; + +# run command +sub runsyscmd { + my $cmd = shift; + my $errmsg = shift; + my $rc = shift; + + print LOG "---------------------------------------------\n"; + print LOG "Run command: $cmd\n"; + + if (!($cmd =~ /2>&1$/)) { $cmd .= ' 2>&1'; } + my @output = `$cmd`; + + my $errcode = 0; + if ($?) { + $errcode = $? >> 8; + } + + foreach (@output) { + print LOG $_; + } + print LOG "---------------------------------------------\n"; + + if ($rc && $errcode) { + outputmsg($errmsg, $rc); + exit $rc; + } + return ($errcode, \@output); +} + +# display the output message +sub outputmsg{ + my $msg = shift; + my $rc =shift; + print LOG $msg; + print $msg; + if ($rc) { + exit $rc; + } +} +