351 lines
12 KiB
Plaintext
351 lines
12 KiB
Plaintext
|
#!/usr/bin/env perl
|
||
|
|
||
|
# This script is used to configure the mics on the host.
|
||
|
# This script is run by xdsh from MN/SN to the host
|
||
|
# parameters
|
||
|
# -m xcatmaster
|
||
|
# -p the path of the mic configuration file. Generally, it's /tftpboot/xcat/miccfg/miccfg.hostname
|
||
|
|
||
|
use strict;
|
||
|
use IO::Socket;
|
||
|
|
||
|
use File::Path;
|
||
|
use File::Copy;
|
||
|
use Getopt::Long;
|
||
|
|
||
|
# enable the autoflush of stdout
|
||
|
select STDOUT;
|
||
|
$| = 1;
|
||
|
|
||
|
my $tmppath = "/tmp/mictmp";
|
||
|
my $logpath = "/var/log/xcat/";
|
||
|
my $logfile = "$logpath/configmic.log";
|
||
|
my $micmnt = "/opt/intel/mic/mnt";
|
||
|
|
||
|
mkpath $tmppath;
|
||
|
mkpath $micmnt;
|
||
|
#open the log file
|
||
|
open (LOG, ">>$logfile") or die "Error: cannot open $logfile\n";
|
||
|
print LOG "\n\n====================================================\nStart mic configuratoin: ".`date`."\n";
|
||
|
|
||
|
my ($master, $cfgpath);
|
||
|
GetOptions ('m=s'=>\$master, 'p=s'=>\$cfgpath);
|
||
|
unless ($master && $cfgpath) {
|
||
|
outputmsg("Error: the -m master and -p path arguments must be specified for configmic.\n", 1);
|
||
|
}
|
||
|
|
||
|
# get the correct host name for the host
|
||
|
my $nodename;
|
||
|
my $masterip = `getent hosts $master | awk {'print \$1'}`;
|
||
|
chomp($masterip);
|
||
|
my $myip = `ip route get $masterip| head -n 1 | sed 's/^.*src//g' | awk {'print \$1'}`;
|
||
|
my $myipinfo =`getent hosts $myip`;
|
||
|
if ($myipinfo =~ /([^\s]+)\s+([^\s]+)\s+([^\s]+)/) {
|
||
|
my $n1 = $2;
|
||
|
my $n2 = $3;
|
||
|
if (length($n1) > length($n2)) {
|
||
|
$nodename = $n2;
|
||
|
} else {
|
||
|
$nodename = $n1;
|
||
|
}
|
||
|
} elsif ($myipinfo =~ /([^\s]+)\s+([^\s]+)/) {
|
||
|
$nodename = $2;
|
||
|
} else {
|
||
|
outputmsg("Error: cannot get the hostname of the host node\n", 2);
|
||
|
}
|
||
|
|
||
|
# download the mic configuration file from master
|
||
|
my $cmd = "wget -N --waitretry=10 --random-wait -T 60 http://$master/$cfgpath/miccfg.$nodename -P $tmppath";
|
||
|
my ($rc, $output) = runsyscmd ($cmd, "Error: failed to download mic configuration file from $master\n", 3);
|
||
|
|
||
|
unless (-r "$tmppath/miccfg.$nodename") {
|
||
|
runsyscmd ("Error: cannot get the mic configuration file from http://$master/$cfgpath/miccfg.$nodename\n", 4);
|
||
|
}
|
||
|
|
||
|
# parse the configuration file
|
||
|
unless (open (CFGFILE, "<$tmppath/miccfg.$nodename")) {
|
||
|
runsyscmd ("Error: cannot open $tmppath/miccfg.$nodename\n", 5);
|
||
|
}
|
||
|
|
||
|
# the configureation file should have the following format
|
||
|
#miclist=mic0
|
||
|
#0:ip=10.10.10.1|br=mybr0|name=host1-mic0|onboot=yes|vlog=no
|
||
|
#imgpath=/install/mpss3.1
|
||
|
#overlay=ol1
|
||
|
|
||
|
my %miccfg;
|
||
|
my $miclist;
|
||
|
my $overlay;
|
||
|
my $ospath;
|
||
|
while (<CFGFILE>) {
|
||
|
if (/(\d+):(.*)/) {
|
||
|
my $deviceid = $1;
|
||
|
my @params = split (/\|/, $2);
|
||
|
foreach (@params) {
|
||
|
my ($n, $v) = split (/=/, $_);
|
||
|
$miccfg{$deviceid}{$n} = $v;
|
||
|
}
|
||
|
} elsif (/^miclist=(.*)/) {
|
||
|
$miclist = $1;
|
||
|
} elsif (/^overlay=(.*)/) {
|
||
|
$overlay = $1;
|
||
|
} elsif (/^imgpath=(.*)/) {
|
||
|
$ospath= $1;
|
||
|
}
|
||
|
}
|
||
|
close (CFGFILE);
|
||
|
|
||
|
$miclist =~ s/,/ /g;
|
||
|
|
||
|
# add the mount entry for mounting of root fs from master to /etc/fstab
|
||
|
# e.g. mount $master:/install/mpss3 /opt/intel/mic/mnt
|
||
|
$cmd = "grep $master:$ospath $micmnt /etc/fstab ";
|
||
|
($rc, $output) = runsyscmd ($cmd);
|
||
|
if ($rc) {
|
||
|
# not found the exact mount entry
|
||
|
$cmd = "grep $micmnt /etc/fstab";
|
||
|
($rc, $output) = runsyscmd ($cmd);
|
||
|
if (!$rc) {
|
||
|
# found the mount to $micmnt with another master or directory, remove the entry and umount it
|
||
|
my $trans = $micmnt;
|
||
|
$trans =~ s/\//\\\//g;
|
||
|
$cmd = "sed \"/$trans/d\" /etc/fstab > $tmppath/fstab.tmp";
|
||
|
runsyscmd ($cmd, "Error: failed to configure fstab.\n", 6);
|
||
|
copy ("$tmppath/fstab.tmp", "/etc/fstab");
|
||
|
$cmd = "umount -l -f $micmnt";
|
||
|
runsyscmd ($cmd, "Error: failed to run umount -l -f $micmnt\n", 7);
|
||
|
}
|
||
|
$cmd = "echo \"$master:$ospath $micmnt nfs timeo=14,intr 1 2\" >>/etc/fstab";
|
||
|
runsyscmd ($cmd);
|
||
|
$cmd = "mount -a";
|
||
|
runsyscmd ($cmd);
|
||
|
}
|
||
|
|
||
|
# make sure the remote files are accessable
|
||
|
unless (-r "$micmnt/opt/intel/mic/filesystem/base.filelist") {
|
||
|
outputmsg("Error: cannot access the $micmnt/opt/intel/mic/filesystem/base.filelist\n", 8);
|
||
|
}
|
||
|
|
||
|
# start to configure the mic
|
||
|
# stop the mpss service first
|
||
|
$cmd = "service mpss stop";
|
||
|
runsyscmd ($cmd, "Error: failed to stop mpss service.\n", 100);
|
||
|
|
||
|
# make sute the mpss has been stopped
|
||
|
my $i = 5;
|
||
|
while ($i > 0) {
|
||
|
$cmd = "service mpss status";
|
||
|
($rc, $output) = runsyscmd ($cmd);
|
||
|
if (grep /mpss is stopped/, @$output) {
|
||
|
last;
|
||
|
}
|
||
|
sleep 2;
|
||
|
$i--;
|
||
|
}
|
||
|
|
||
|
# remove the mic configuration file
|
||
|
unlink ("/etc/sysconfig/mic/default.conf");
|
||
|
foreach my $mic (split (/ /, $miclist)) {
|
||
|
unlink ("/etc/sysconfig/mic/$mic.conf");
|
||
|
unlink ("/opt/intel/mic/filesystem/$mic.filelist");
|
||
|
rmtree ("/opt/intel/mic/filesystem/$mic");
|
||
|
}
|
||
|
|
||
|
# reset the configuration to default
|
||
|
$cmd = "micctrl --initdefaults $miclist";
|
||
|
runsyscmd ($cmd, "Error: failed to initiate the mic devices.\n", 200);
|
||
|
|
||
|
# configure the base dir
|
||
|
$cmd = "micctrl --basedir=/opt/intel/mic/mnt/opt/intel/mic/filesystem/base --list=/opt/intel/mic/mnt/opt/intel/mic/filesystem/base.filelist $miclist";
|
||
|
runsyscmd ($cmd, "Error: failed to change the base dir for mic file system..\n", 101);
|
||
|
|
||
|
# configure the commondir
|
||
|
$cmd = "micctrl --commondir=/opt/intel/mic/mnt/opt/intel/mic/filesystem/common --list=/opt/intel/mic/mnt/opt/intel/mic/filesystem/common.filelist $miclist";
|
||
|
runsyscmd ($cmd, "Error: failed to change the common dir for mic file system.\n", 102);
|
||
|
|
||
|
# configure the overlay
|
||
|
my @ols = split (/,/, $overlay);
|
||
|
foreach (@ols) {
|
||
|
$cmd = "micctrl --overlay=filelist --state=on --source=/opt/intel/mic/mnt/opt/intel/mic/filesystem/overlay/$_ --target=/opt/intel/mic/mnt/opt/intel/mic/filesystem/overlay/$_.filelist $miclist";
|
||
|
runsyscmd ($cmd, "Error: failed to change the overlay dir for mic file system.\n", 103);
|
||
|
}
|
||
|
|
||
|
# do the mic specific configuration
|
||
|
$cmd = "ip -4 addr show";
|
||
|
($rc, $output) = runsyscmd ($cmd);
|
||
|
$cmd = "ip -4 route show";
|
||
|
my ($rc2, $output2) = runsyscmd ($cmd);
|
||
|
foreach my $micid (keys %miccfg) {
|
||
|
my $micname = $miccfg{$micid}{'name'};
|
||
|
# set the boot device to be staticramfs so that the osimage don't need to generated for every boot
|
||
|
$cmd = "micctrl --rootdev=StaticRamFS --target=/opt/intel/mic/filesystem/$micname.image mic$micid";
|
||
|
runsyscmd ($cmd, "Error: failed to set root image for mic.\n", 104);
|
||
|
|
||
|
# set the linux kernel location
|
||
|
$cmd = "micctrl --osimage=$micmnt/lib/firmware/mic/uos.img mic$micid";
|
||
|
runsyscmd ($cmd, "Error: failed to linux kernle location for mic.\n", 105);
|
||
|
|
||
|
# set the autoboot
|
||
|
if ($miccfg{$micid}{'onboot'} =~ /no/i) {
|
||
|
$cmd = "micctrl --autoboot=no mic$micid";
|
||
|
} elsif($miccfg{$micid}{'onboot'} =~ /yes/i) {
|
||
|
$cmd = "micctrl --autoboot=yes mic$micid";
|
||
|
}
|
||
|
runsyscmd ($cmd, "Error: failed to set the autoboot for mic.\n", 106);
|
||
|
|
||
|
# set the hostname
|
||
|
$cmd = "sed \"s/Hostname .*/Hostname \"$micname\"/\" /etc/sysconfig/mic/mic$micid.conf > $tmppath/mic$micid.conf";
|
||
|
runsyscmd ($cmd, "Error: failed to set hostname for mic.\n", 107);
|
||
|
copy ("$tmppath/mic$micid.conf", "/etc/sysconfig/mic/mic$micid.conf");
|
||
|
|
||
|
# configure the Verbose log
|
||
|
if ($miccfg{$micid}{'vlog'} =~ /yes/i) {
|
||
|
$cmd = "sed \"s/VerboseLogging .*/VerboseLogging \"Enabled\"/\" /etc/sysconfig/mic/mic$micid.conf > $tmppath/mic$micid.conf";
|
||
|
runsyscmd ($cmd, "Error: failed to set Verbose log for mic.\n", 108);
|
||
|
copy ("$tmppath/mic$micid.conf", "/etc/sysconfig/mic/mic$micid.conf");
|
||
|
}
|
||
|
|
||
|
# configure the bridge and ip for the mic
|
||
|
# get the ip of the bridge
|
||
|
my $brg = $miccfg{$micid}{br};
|
||
|
my ($brip, $netbit, $brc, $mtu);
|
||
|
foreach (@$output) {
|
||
|
if (/inet\s+([\d\.]+)\/(\d+)\s+brd\s+([\d\.]+) scope global $brg/) {
|
||
|
$brip = $1;
|
||
|
$netbit = $2;
|
||
|
$brc = $3;
|
||
|
last;
|
||
|
} elsif (/\d+:\s+$brg:.*mtu\s+(\d+)/) {
|
||
|
$mtu = $1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
unless ($brip && $netbit && $brc) {
|
||
|
outputmsg("Error: failed to get ip for the bridge $brg.\n", 110);
|
||
|
}
|
||
|
|
||
|
# add the bridge to mic configuration
|
||
|
# since the micctrl --addbridge=<brgname> --type=external --ip=<ip of brg> --netbits=8 does not
|
||
|
# work with '--netbis=8', the bridge configuration has been done by changing cfg file directly.
|
||
|
$cmd = "echo \"Bridge $brg External $brip $netbit $mtu\" >> /etc/sysconfig/mic/default.conf";
|
||
|
runsyscmd ($cmd);
|
||
|
|
||
|
# configre network for each mic
|
||
|
$cmd = "micctrl --network=static --bridge=".$brg." --ip=".$miccfg{$micid}{ip}." mic$micid";
|
||
|
runsyscmd ($cmd, "Error: failed to generate IP configuration for mic.\n", 104);
|
||
|
|
||
|
# take the configuration to effect
|
||
|
$cmd = "micctrl --resetconfig mic$micid";
|
||
|
runsyscmd ($cmd, "Error: failed to spread the configuration.\n", 201);
|
||
|
|
||
|
# get the gateway of the bridge
|
||
|
my $netmask = 2**$netbit - 1 << (32 - $netbit);
|
||
|
my $brnet = unpack("N", inet_aton($brip));
|
||
|
$brnet &= $netmask;
|
||
|
$brnet = inet_ntoa(pack("N", $brnet));
|
||
|
my $brgw;
|
||
|
foreach (@$output2) {
|
||
|
if (/$brnet\/$netbit\s+dev\s+$brg.*src\s+([\d\.]+)/) {
|
||
|
$brgw = $1;
|
||
|
last;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# configure the mic interface to the real interface cfg file instead of mic cfg file
|
||
|
# this also because the defect of intel tool so that we have to do it separated
|
||
|
my @ifcfg = ("IPADDR=$miccfg{$micid}{ip}\n", "GATEWAY=$brgw\n", "PREFIX=$netbit\n");
|
||
|
unless (open (IFCFG, ">/opt/intel/mic/filesystem/mic$micid/etc/sysconfig/network/ifcfg-mic0")) {
|
||
|
outputmsg ("Error: cannot open ifcfg-mic$micid\n", 111);
|
||
|
}
|
||
|
print IFCFG @ifcfg;
|
||
|
close (IFCFG);
|
||
|
|
||
|
# copy the system files which generated by genimage to the micdir
|
||
|
# e.g. /etc/hosts /etc/passwd ...
|
||
|
my $src = "$micmnt/opt/intel/mic/filesystem/overlay/system/*";
|
||
|
my $dst = "/opt/intel/mic/filesystem/mic$micid";
|
||
|
$cmd = "/bin/cp -rf $src $dst";
|
||
|
runsyscmd ($cmd, "Error: failed to copy the overlay dir.\n", 300);
|
||
|
|
||
|
# generate the static root file system in ramdisk format
|
||
|
$cmd = "micctrl --updateramfs mic$micid";
|
||
|
runsyscmd ($cmd, "Error: failed to generate the static ramfs.\n", 301);
|
||
|
}
|
||
|
|
||
|
# start the mpss service after the configuration
|
||
|
$cmd = "service mpss start >/dev/null 2>&1";
|
||
|
system($cmd);
|
||
|
|
||
|
$i = 5;
|
||
|
while ($i > 0) {
|
||
|
$cmd = "service mpss status";
|
||
|
($rc, $output) = runsyscmd ($cmd, "Error: failed to get the status of mpss.\n", 100);
|
||
|
if (grep /mpss is running/, @$output) {
|
||
|
last;
|
||
|
}
|
||
|
sleep 2;
|
||
|
$i--;
|
||
|
}
|
||
|
|
||
|
# notice nodeset command, the configuratoin has been done
|
||
|
foreach my $micid (keys %miccfg) {
|
||
|
outputmsg ("MICMSG:$miccfg{$micid}{'name'}: Done\n");
|
||
|
}
|
||
|
|
||
|
print LOG "mpss has been started\n";
|
||
|
print LOG "\nFinish the mic configuratoin: ".`date`."====================================================\n";
|
||
|
|
||
|
close (LOG);
|
||
|
|
||
|
# since the start of mpss service created a new process and for some reason the process caused the hang of
|
||
|
# sshd root@notty on the host (that means the xdsh cannot get connection closed from host), a work around
|
||
|
# is to kill the sshd connection by force
|
||
|
$cmd = "ps -ef | grep sshd | grep notty |awk -F\' \' \'{print \$2}\'";
|
||
|
($rc, $output) = runsyscmd ($cmd);
|
||
|
foreach (@$output) {
|
||
|
kill 15, $_;
|
||
|
}
|
||
|
|
||
|
exit 0;
|
||
|
|
||
|
# run command
|
||
|
sub runsyscmd {
|
||
|
my $cmd = shift;
|
||
|
my $errmsg = shift;
|
||
|
my $rc = shift;
|
||
|
|
||
|
print LOG "---------------------------------------------\n";
|
||
|
print LOG "Run command: $cmd\n";
|
||
|
|
||
|
if (!($cmd =~ /2>&1$/)) { $cmd .= ' 2>&1'; }
|
||
|
my @output = `$cmd`;
|
||
|
|
||
|
my $errcode = 0;
|
||
|
if ($?) {
|
||
|
$errcode = $? >> 8;
|
||
|
}
|
||
|
|
||
|
foreach (@output) {
|
||
|
print LOG $_;
|
||
|
}
|
||
|
print LOG "---------------------------------------------\n";
|
||
|
|
||
|
if ($rc && $errcode) {
|
||
|
outputmsg($errmsg, $rc);
|
||
|
exit $rc;
|
||
|
}
|
||
|
return ($errcode, \@output);
|
||
|
}
|
||
|
|
||
|
# display the output message
|
||
|
sub outputmsg{
|
||
|
my $msg = shift;
|
||
|
my $rc =shift;
|
||
|
print LOG $msg;
|
||
|
print $msg;
|
||
|
if ($rc) {
|
||
|
exit $rc;
|
||
|
}
|
||
|
}
|
||
|
|