From 29457d1177221cc68390904ca86823968678411b Mon Sep 17 00:00:00 2001 From: Bruce Potter Date: Sat, 24 May 2014 05:46:14 -0400 Subject: [PATCH] softlayer install and sysclone working for strangely ordered nics and cross vlan --- xCAT-SoftLayer/bin/khrem | 87 +++++++++++++++++++ xCAT-SoftLayer/bin/modifygrub | 64 +++++++++++--- xCAT-SoftLayer/bin/pushinitrd | 22 +++-- xCAT-SoftLayer/postscripts/configbond | 13 +-- .../sles/compute.sles11.softlayer.tmpl | 7 ++ .../si-post-install/16all.updatenetwork | 38 ++++++++ .../si-post-install/20all.mkinitrd_for_suse11 | 11 +++ xCAT-genesis-scripts/bin/doxcat | 13 ++- 8 files changed, 224 insertions(+), 31 deletions(-) create mode 100755 xCAT-SoftLayer/bin/khrem create mode 100755 xCAT-SoftLayer/si-post-install/20all.mkinitrd_for_suse11 diff --git a/xCAT-SoftLayer/bin/khrem b/xCAT-SoftLayer/bin/khrem new file mode 100755 index 000000000..7f5f25d1b --- /dev/null +++ b/xCAT-SoftLayer/bin/khrem @@ -0,0 +1,87 @@ +#!/usr/bin/perl + +# remove entries from the .ssh/known_hosts file for a node + +use strict; +use Getopt::Long; +use Data::Dumper; +#$Data::Dumper::Maxdepth=2; + +# Globals - these are set once and then only read. +my $HELP; +my $VERBOSE; +my $file = '~/.ssh/known_hosts'; + +my $usage = sub { + my $exitcode = shift @_; + print "Usage: khrem \n"; + exit $exitcode; +}; + +# Process the cmd line args +Getopt::Long::Configure("bundling"); +#Getopt::Long::Configure("pass_through"); +Getopt::Long::Configure("no_pass_through"); +if (!GetOptions('h|?|help' => \$HELP, 'v|verbose' => \$VERBOSE)) { $usage->(1); } + +if ($HELP) { $usage->(0); } +if (scalar(@ARGV)!=1) { $usage->(1); } +my $node = $ARGV[0]; # if they specified a hostname match, only show svrs that start with that + +my @output = runcmd("host $node"); +my $hostname; +my $line = shift @output; +#print "line=$line\n"; +if ($line =~ m/is an alias for /) { + ($hostname) = $line =~ m/is an alias for ([^\.]+)/; + #print "hostname=$hostname\n"; + $line = shift @output; + } +#print "line=$line\n"; +my ($ip) = $line =~ m/has address (.+)$/; +if (defined($hostname)) { + print "Removing entries from $file for: $node, $hostname, $ip\n"; + runcmd("sed -i '/^$node/d;/^$hostname/d;/^$ip/d' $file"); + } +else { + print "Removing entries from $file for: $node, $ip\n"; + runcmd("sed -i '/^$node/d;/^$ip/d' $file"); +} + +exit(0); + + +# Pring msg only if -v was specified +sub verbose { if ($VERBOSE) { print shift, "\n"; } } + + + + +# Run a command. If called in the context of return an array, it will capture the output +# of the cmd and return it. Otherwise, it will display the output to stdout. +# If the cmd has a non-zero rc, this function will die with a msg. +sub runcmd +{ + my ($cmd) = @_; + my $rc; + + $cmd .= ' 2>&1' ; + verbose($cmd); + + my @output; + if (wantarray) { + @output = `$cmd`; + $rc = $?; + } + else { + system($cmd); + $rc = $?; + } + + if ($rc) { + $rc = $rc >> 8; + if ($rc > 0) { die "Error: rc $rc return from cmd: $cmd\n"; } + else { die "Error: system error returned from cmd: $cmd\n"; } + } + elsif (wantarray) { return @output; } +} diff --git a/xCAT-SoftLayer/bin/modifygrub b/xCAT-SoftLayer/bin/modifygrub index d0ee65475..906699a6b 100755 --- a/xCAT-SoftLayer/bin/modifygrub +++ b/xCAT-SoftLayer/bin/modifygrub @@ -12,13 +12,14 @@ use Socket; # Globals - these are set once and then only read. my $HELP; my $VERBOSE; +my $DRYRUN; my $WAITTIME; my $PROVMETHOD; my $XCATNETBOOTTITLE = 'xCAT network boot kernel and initrd'; my $usage = sub { my $exitcode = shift @_; - print "Usage: modifygrub [-?|-h|--help] [-v|--verbose] [-w ] [-p \n\n"; + print "Usage: modifygrub [-?|-h|--help] [-v|--verbose] [--dryrun] [-w ] [-p \n\n"; if (!$exitcode) { print "Modify the grub config file on the node to boot the specified kernel and initrd.\n"; } @@ -31,7 +32,7 @@ if (-f '/etc/os-release') { die "This script doesn't support ubuntu yet.\n"; } Getopt::Long::Configure("bundling"); #Getopt::Long::Configure("pass_through"); Getopt::Long::Configure("no_pass_through"); -if (!GetOptions('h|?|help' => \$HELP, 'v|verbose' => \$VERBOSE, 'w|waittime=s' => \$WAITTIME, 'p|provmethod=s' => \$PROVMETHOD)) { $usage->(1); } +if (!GetOptions('h|?|help' => \$HELP, 'v|verbose' => \$VERBOSE, 'dryrun' => \$DRYRUN, 'w|waittime=s' => \$WAITTIME, 'p|provmethod=s' => \$PROVMETHOD)) { $usage->(1); } if ($HELP) { $usage->(0); } if (scalar(@ARGV) != 4) { $usage->(1); } @@ -71,13 +72,15 @@ sub addKernelParms { $bootif = "BOOTIF=01-$bootif"; } + #todo: if you are running genesis shell (nodeset shell), this if-else will depend on the nodeset done before that. + # really should check for currstate=shell, or something like that if (defined($PROVMETHOD) && $PROVMETHOD eq 'sysclone') { # add additional parms for sysclone # DEVICE=eth0 IPADDR=10.0.0.99 NETMASK=255.255.255.0 NETWORK=10.0.0.0 BROADCAST=10.0.0.255 GATEWAY=10.0.0.1 GATEWAYDEV=eth0 #todo: should we also add ETHER_SLEEP=$WAITTIME textmode=1 dns=$mnip ? $args->{kernelparms} .= " $bootif IPADDR=$ip NETMASK=$netmask NETWORK=$network BROADCAST=$broadcast GATEWAY=$gateway HOSTNAME=$nodename DEVICE=$nic GATEWAYDEV=$nic"; } - else { # scripted install or genesis shell + else { # scripted install $args->{kernelparms} .= " $bootif hostip=$ip netmask=$netmask gateway=$gateway dns=$mnip hostname=$nodename netdevice=$nic netwait=$WAITTIME textmode=1"; } } @@ -89,7 +92,8 @@ sub getNodeIpInfo { my ($ipprefix) = $args->{mnip}=~m/^(\d+)\./; #todo: this is a hack, just using the 1st octet of the mn ip addr verbose("using IP prefix $ipprefix"); - # parse ip addr show output, looking for ipprefix, to determine nic and ip + # parse ip addr show output, looking for ipprefix, to determine nic, ip, mac + #todo: is there a way to find the actual/individual mac of the nic? When 2 nics are bonded, they both display the same mac. my @output = runcmd("ip addr show"); my ($nic, $mac, $ipandmask); foreach my $line (@output) { @@ -98,22 +102,49 @@ sub getNodeIpInfo { if (($mactmp) = $line=~m|^\s+link/ether\s+(\S+) |) { $mac = $mactmp; } # got mac, remember it if (($iptmp) = $line=~m/^\s+inet\s+($ipprefix\S+) /) { $ipandmask = $iptmp; last; } # got ip, we are done } + if (!defined($ipandmask)) { die "Error: can't find a NIC with a prefix $ipprefix that communicates with".$args->{mnip}.".\n"; } my ($ip, $netmask, $network, $broadcast) = convertIpAndMask($ipandmask); - # if the nic is a bonded nic (common on sl), then find the 1st real nic that is part of it - my $realnic = $nic; + # if the nic is a bonded nic (common on sl), then find the 1st real nic that is up that is part of it. + # also find that real nics real mac + my $realnic; if ($nic =~ /^bond/) { my @nics = grep(m/\s+master\s+$nic\s+/, @output); if (!scalar(@nics)) { die "Error: can't find the NICs that are part of $nic.\n"; } - ($realnic) = $nics[0]=~m/^\d+:\s+(\S+): /; - # do not need to go back thru the ip addr show output and find the mac of this nic because the mac - # of the bond nic is the same. Plus the code below does not work right for some reason anyway. - #foreach my $line (@output) { - # my ($nictmp, $mactmp, $foundnic); - # if (($nictmp) = $line=~m/^\d+:\s+(\S+): / && $nictmp eq $realnic) { $foundnic = 1; } - # if (($mactmp) = $line=~m|^\s+link/ether\s+(\S+) | && $foundnic) { $mac = $mactmp; last; } # got mac, we are done - #} + foreach my $line (@nics) { + my ($nictmp, $state) = $line=~m/^\d+:\s+(\S+): .* state\s+(\S+)/; + if (defined($nictmp) && defined($state) && $state eq 'UP') { $realnic = $nictmp; last; } # got ip, we are done + } + if (!defined($realnic)) { die "Error: can't find a physical NIC that is up and part of $nic.\n"; } + + # now get the real mac of this real nic (when 2 nics are bonded, ip addr show displays one of the nics + # macs for both nics and the bond). So we have to depend on /proc/net/bonding/$bond instead. + my @bondout = runcmd("cat /proc/net/bonding/$nic"); + my $foundnic; + foreach my $line (@bondout) { + my $mactmp; + if ($line=~m/^Slave Interface:\s+$realnic/) { $foundnic = 1; } # found the stanza for this nic, remember it + if ($foundnic && (($mactmp) = $line=~m/^Permanent HW addr:\s+(\S+)/)) { $mac = $mactmp; last; } + } } + else { $realnic = $nic; } + + # centos/redhat seems to name the nic in a different order than sles on some svrs. + # sles seems to name them in the same order as 'ip addr show' displays them, centos does not. + # so if we are on centos right now, we need to count down to determine the number that sles + # will give the nic that we have selected, because it is the sles naming that we care about, + # because that is the initrd that will be running in the scripted install case. + # For the sysclone case, genesis doxcat should be changed to use the mac to find the nic. + if (isRedhat()) { + my @nics = grep(m/^\d+:\s+eth/, @output); + my $i = 0; + foreach my $line (@nics) { + my ($nictmp) = $line=~m/^\d+:\s+(\S+):/; + if (defined($nictmp) && $nictmp eq $realnic) { $realnic = "eth$i"; last; } # got ip, we are done + $i++; + } + } + print "Determined that SLES will call the install NIC $realnic (it has mac $mac)\n"; # finally, find the gateway my $gateway; @@ -198,6 +229,11 @@ sub updateGrub { "\tkernel " . $fileprefix . $args->{kernelpath} . ' ' . $args->{kernelparms} . "\n", "\tinitrd " . $fileprefix . $args->{initrdpath} . "\n", ); + if ($DRYRUN) { + print "Dry run: would add this stanza to $grubfile:\n"; + foreach my $l (@entry) { print $l; } + return; + } my $needtowritefile = 1; if (grep(/^title\s+$XCATNETBOOTTITLE/, @lines)) { $needtowritefile = updateGrubEntry(\@lines, \@entry); } # there is already an entry in there diff --git a/xCAT-SoftLayer/bin/pushinitrd b/xCAT-SoftLayer/bin/pushinitrd index 2485d186c..62c19d8ba 100755 --- a/xCAT-SoftLayer/bin/pushinitrd +++ b/xCAT-SoftLayer/bin/pushinitrd @@ -13,12 +13,13 @@ use Data::Dumper; # Globals - these are set once and then only read. my $HELP; my $VERBOSE; +my $DRYRUN; my $WAITTIME; my $NOAUTOINST; my $usage = sub { my $exitcode = shift @_; - print "Usage: pushinitrd [-?|-h|--help] [-v|--verbose] [-w ] \n\n"; + print "Usage: pushinitrd [-?|-h|--help] [-v|--verbose] [--dryrun] [-w ] \n\n"; if (!$exitcode) { print "Copy the initrd, kernel, params, and static IP info to nodes, so they can net install\n"; print "even across vlans (w/o setting up pxe/dhcp broadcast relay). This assumes a working\n"; @@ -32,7 +33,7 @@ my $usage = sub { Getopt::Long::Configure("bundling"); #Getopt::Long::Configure("pass_through"); Getopt::Long::Configure("no_pass_through"); -if (!GetOptions('h|?|help' => \$HELP, 'v|verbose' => \$VERBOSE, 'w|waittime=s' => \$WAITTIME, 'a|noautoinst' => \$NOAUTOINST)) { $usage->(1); } +if (!GetOptions('h|?|help' => \$HELP, 'v|verbose' => \$VERBOSE, 'dryrun' => \$DRYRUN, 'w|waittime=s' => \$WAITTIME, 'a|noautoinst' => \$NOAUTOINST)) { $usage->(1); } if ($HELP) { $usage->(0); } if (scalar(@ARGV) != 1) { $usage->(1); } @@ -45,6 +46,8 @@ copyFilesToNodes($noderange, \%bootparms); updateGrubOnNodes($noderange, \%bootparms); +if ($DRYRUN) { exit(0); } + if ($bootparms{osimageprovmethod} eq 'install' && !$NOAUTOINST) { modifyAutoinstFiles($noderange, \%bootparms); } if ($bootparms{osimageprovmethod} eq 'sysclone') { copySyscloneFiles(); } @@ -66,6 +69,8 @@ sub getBootParms { # for now just pick the 1st one. They should all be the same, except for the node name in kcmdline chomp($gresults[0]); $gresults[0] =~ s/^\S+:\s+$attr:\s*//; + #print "gresults='$gresults[0]'\n"; + if ($gresults[0] !~ m/\S/) { die "Error: attribute $attr not defined for the noderange. Did you run 'nodeset osimage=' ?\n"; } $bootparms{$a} = $gresults[0]; } $bootparms{kcmdline} =~ s|/install/autoinst/\S+|/install/autoinst/|; @@ -73,6 +78,7 @@ sub getBootParms { # from the nodes provmethod, get the osimage provmethod, so we know the type of install @output = runcmd("lsdef -t osimage $bootparms{provmethod} -ci provmethod"); chomp($output[0]); + if ($output[0] =~ m/^Could not find/) { die "Error: provmethod $bootparms{provmethod} is set for the node, but there is no osimage definition by that name."; } my ($junk, $provmethod) = split(/=/, $output[0]); $bootparms{osimageprovmethod} = $provmethod; @@ -97,8 +103,13 @@ sub copyFilesToNodes { my $localfile = "/tftpboot/$file"; # for the my $remotefile = '/boot/' . remoteFilename($file); - print "Copying $localfile to $nr:$remotefile\n"; - runcmd("xdcp $nr -p $localfile $remotefile"); + if ($DRYRUN) { + print "Dry run: would copy $localfile to $nr:$remotefile\n"; + } + else { + print "Copying $localfile to $nr:$remotefile\n"; + runcmd("xdcp $nr -p $localfile $remotefile"); + } } } @@ -117,10 +128,11 @@ sub updateGrubOnNodes { my $nr = shift @_; my $bootparms = shift @_; my $vtxt = ($VERBOSE ? '-v' : ''); + my $dtxt = ($DRYRUN ? '--dryrun' : ''); my @output = runcmd('which modifygrub'); my $modifygrub = $output[0]; chomp($modifygrub); - my $cmd = "xdsh $nr -e $modifygrub $vtxt -w $WAITTIME -p " . $bootparms->{osimageprovmethod} . ' ' . remoteFilename($bootparms->{kernel}) . ' ' . remoteFilename($bootparms->{initrd}) . ' '; + my $cmd = "xdsh $nr -e $modifygrub $vtxt $dtxt -w $WAITTIME -p " . $bootparms->{osimageprovmethod} . ' ' . remoteFilename($bootparms->{kernel}) . ' ' . remoteFilename($bootparms->{initrd}) . ' '; # we need to quote the kernel parms, both here when passing it to xdsh, and on the node # when xdsh is passing it to modifygrub. The way to get single quotes inside single quotes # is to quote each of the outer single quotes with double quotes. diff --git a/xCAT-SoftLayer/postscripts/configbond b/xCAT-SoftLayer/postscripts/configbond index 0cd4e033d..93162dc7a 100755 --- a/xCAT-SoftLayer/postscripts/configbond +++ b/xCAT-SoftLayer/postscripts/configbond @@ -4,26 +4,29 @@ # Usage: configbond bond1 eth1 [eth3] # # Note: this postscript currently has some assumptions that are specific to the softlayer environment. -# It is only used to configure bond1, because bond0 gets configured by the node provisioning process. +# We only use this to configure bond1, because bond0 gets configured by the node provisioning process. +# (altho this script would work for bond0) use strict; # Check number of args my $nargs = $#ARGV + 1; if (scalar(@ARGV) < 2 || scalar(@ARGV) > 3) { - system("logger -t xcat -p local4.err 'Usage: configbond bond dev0 [dev1]'"); + system("logger -t xcat -p local4.err 'Usage: configbond []'"); exit 1; } my $bond = shift(@ARGV); my $nic = $ARGV[0]; -my @devs = (); -push(@devs,$ARGV[0]); -if (defined($ARGV[1])) { push(@devs,$ARGV[1]); } +my @devs; +push(@devs,@ARGV); my $nicips = $ENV{NICIPS}; my $nicnetworks = $ENV{NICNETWORKS}; my $net_cnt = $ENV{NETWORKS_LINES}; +#todo: change this script so they dont need to specify nicnetworks +if (!$nicips || !$nicnetworks) { system("logger -t xcat -p local4.err 'configbond: must specify attributes nicips and nicnetworks in the xcat db for this node.'"); exit 1; } + #todo: these are specific to softlayer. They should be another attribute or argument my $bondingopts = 'mode=4 miimon=100 downdelay=0 updelay=0 lacp_rate=fast xmit_hash_policy=1'; diff --git a/xCAT-SoftLayer/share/xcat/install/sles/compute.sles11.softlayer.tmpl b/xCAT-SoftLayer/share/xcat/install/sles/compute.sles11.softlayer.tmpl index e0ac43fdd..c95392007 100644 --- a/xCAT-SoftLayer/share/xcat/install/sles/compute.sles11.softlayer.tmpl +++ b/xCAT-SoftLayer/share/xcat/install/sles/compute.sles11.softlayer.tmpl @@ -82,6 +82,7 @@ yes mode=4 miimon=100 downdelay=0 updelay=0 lacp_rate=fast xmit_hash_policy=1 eth0 + eth2 bond0 static auto @@ -95,6 +96,12 @@ Ethernet Card 0 off + + none + eth2 + Ethernet Card 2 + off + false diff --git a/xCAT-SoftLayer/si-post-install/16all.updatenetwork b/xCAT-SoftLayer/si-post-install/16all.updatenetwork index db6830859..e199dc4c5 100755 --- a/xCAT-SoftLayer/si-post-install/16all.updatenetwork +++ b/xCAT-SoftLayer/si-post-install/16all.updatenetwork @@ -39,6 +39,9 @@ fi hostname $HOSTNAME bond=bond0 +if [[ $DEVICE == "eth0" ]]; then + $DEVICE2=eth2 +fi device_names=`ifconfig -a | grep -i hwaddr | grep -i 'Ethernet' | grep -v usb| awk '{print $1}'` str_cfg_file='' @@ -83,9 +86,23 @@ if [ -d "/etc/sysconfig/network-scripts/" ];then echo "SLAVE=yes" >> $str_cfg_file echo "USERCTL=no" >> $str_cfg_file + if [[ $DEVICE2 != "" ]]; then + # write ifcfg-eth0 + i="$DEVICE2" + str_cfg_file="$dir/ifcfg-$i" + echo "DEVICE=$i" > $str_cfg_file + echo "BOOTPROTO=none" >> $str_cfg_file + echo "MASTER=$bond" >> $str_cfg_file + echo "ONBOOT=yes" >> $str_cfg_file + echo "SLAVE=yes" >> $str_cfg_file + echo "USERCTL=no" >> $str_cfg_file + fi + # write modprobe alias config str_cfg_file="/etc/modprobe.d/$bond.conf" echo "alias $bond bonding" > $str_cfg_file + + #todo: figure out how to set the default gateway in rhel else # use dhcp for all nics for i in $device_names;do @@ -122,6 +139,9 @@ elif [ -d "/etc/sysconfig/network/" ];then echo "BROADCAST=$BROADCAST" >> $str_cfg_file echo "USERCONTROL=no" >> $str_cfg_file echo "BONDING_SLAVE_0=$DEVICE" >> $str_cfg_file + if [[ $DEVICE2 != "" ]]; then + echo "BONDING_SLAVE_1=$DEVICE2" >> $str_cfg_file + fi # write ifcfg-eth0 i="$DEVICE" @@ -129,10 +149,28 @@ elif [ -d "/etc/sysconfig/network/" ];then echo "BOOTPROTO=none" > $str_cfg_file echo "STARTMODE=hotplug" >> $str_cfg_file + if [[ $DEVICE2 != "" ]]; then + # write ifcfg-eth2 + i="$DEVICE2" + str_cfg_file="$dir/ifcfg-$i" + echo "BOOTPROTO=none" > $str_cfg_file + echo "STARTMODE=hotplug" >> $str_cfg_file + fi + # write modprobe alias config str_cfg_file="/etc/modprobe.d/$bond.conf" echo "alias $bond bonding" > $str_cfg_file + # set the default gateway (at this point this is the private nic gateway, to handle provision across vlans) + file=/etc/sysconfig/network/routes + if grep -q -E '^default ' $file; then + # replace the default route that is already in there + sed -i 's/^default .*$/default '$GATEWAY' - -/' $file + else + # no default route yet, append to file + echo "default $GATEWAY - -" >>$file + fi + # this was the original config of the eth0 nic (without bonding) #echo "DEVICE=$i" > $str_cfg_file #echo "BOOTPROTO=static" >> $str_cfg_file diff --git a/xCAT-SoftLayer/si-post-install/20all.mkinitrd_for_suse11 b/xCAT-SoftLayer/si-post-install/20all.mkinitrd_for_suse11 new file mode 100755 index 000000000..c18e454f8 --- /dev/null +++ b/xCAT-SoftLayer/si-post-install/20all.mkinitrd_for_suse11 @@ -0,0 +1,11 @@ +#!/bin/bash + +# This SI post-install script is needed because the initrd that autoyast builds when installing +# sles on the golden node may not have the drivers when that initrd runs on the node that is +# being deployed with this image (specifically, drivers to be able to mount the disk). +# So rebuild the initrd on the to-node after putting the image on the disk, but before rebooting. + +#todo: this same issue could occur on other distros too. Make this script work on red hat by +# checking for dracut and using that if it exists. + +mkinitrd diff --git a/xCAT-genesis-scripts/bin/doxcat b/xCAT-genesis-scripts/bin/doxcat index 7e8a0a2f7..7e84e097c 100755 --- a/xCAT-genesis-scripts/bin/doxcat +++ b/xCAT-genesis-scripts/bin/doxcat @@ -126,18 +126,17 @@ for parm in `cat /proc/cmdline`; do netmask=$value elif [[ ${key,,} == "gateway" ]]; then gateway=$value - elif [[ ${key,,} == "netdevice" || ${key,,} == "device" ]]; then - netdevice=$value fi done -if [[ -n $hostip && -n $netmask && -n $gateway && -n $netdevice ]]; then +if [[ -n $hostip && -n $netmask && -n $gateway && -n $bootnic ]]; then # doing static ip + # the device was determined above from the bootif mac, and put in bootnic numbits=$(mask2prefix $netmask) broadcast=$(bcastcalc $hostip $netmask) - echo "Setting static IP=$hostip/$numbits broadcast=$broadcast gateway=$gateway netdevice=$netdevice ..." - ip addr add $hostip/$numbits broadcast $broadcast dev $netdevice scope global label $netdevice - ip link set $netdevice up - ip route replace to default via $gateway dev $netdevice + echo "Setting static IP=$hostip/$numbits broadcast=$broadcast gateway=$gateway device=$bootnic BOOTIF=$BOOTIF ..." + ip addr add $hostip/$numbits broadcast $broadcast dev $bootnic scope global label $bootnic + ip link set $bootnic up + ip route replace to default via $gateway dev $bootnic # in softlayer it takes up to 60 seconds for the nic to actually be able to communicate echo -n Waiting to reach xCAT mgmt node $gateway. xcatretries=60