394 lines
13 KiB
Plaintext
Raw Normal View History

#!/usr/bin/perl
#!/usr/bin/env perl
# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html
# Run the asu64 utility out of band to multiple nodes, either sequentially or in parallel
BEGIN
{
$::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : -d '/opt/xcat' ? '/opt/xcat' : '/usr';
}
use strict;
close(STDIN);
open(STDIN,"<","/dev/null");
use lib "$::XCATROOT/lib/perl";
use IO::Socket::SSL;
use XML::Simple;
$XML::Simple::PREFERRED_PARSER='XML::Parser';
use Data::Dumper;
use IO::Handle;
use IO::Select;
use xCAT::Utils;
use Getopt::Long;
use POSIX qw(:signal_h :errno_h :sys_wait_h);
use Thread qw(yield);
$::asucmd = '/opt/ibm/toolscenter/asu/asu64';
my $interface;
my $username;
my $passwd;
my $fanout;
my $batchfile;
my $help;
Getopt::Long::Configure("require_order");
Getopt::Long::Configure("bundling");
Getopt::Long::Configure("no_pass_through");
if (!GetOptions(
"i|interface=s" => \$interface,
"p|passwd=s" => \$passwd,
'l|loginname=s' => \$username,
'f|fanout=i' => \$fanout,
'b|batch=s' => \$batchfile,
'r|retry' => \$::RETRY,
'd|donotfilter' => \$::DONOTFILTER,
"n|nonodecheck" => \$::NONODECHECK, #does not check the noderange, in this case, noderange need to be a simple list of nodes.
"V|verbose" => \$::VERBOSE,
'h|help' => \$help,
) || $help || ($batchfile && scalar(@ARGV)!=1) || (!$batchfile && scalar(@ARGV)<2) ) {
print "Usage: pasu [-V] [-d] [-n] [-i <hostname-suffix>] [-l <user>] [-p <passwd>] [-f <fanout>] <noderange> <command>\n";
print " pasu [-V] [-d] [-n] [-i <hostname-suffix>] [-l <user>] [-p <passwd>] [-f <fanout>] -b <batchfile> <noderange>\n";
exit;
}
my %nodehdl;
my $xcathost='localhost:3001';
my $pasumaxp = 64;
if ($ENV{XCATHOST}) {
$xcathost=$ENV{XCATHOST};
}
if ($ENV{XCATPSHFANOUT}) {
$pasumaxp=$ENV{XCATPSHFANOUT};
}
if ($fanout) { # see if they overroad the fanout from the command line
$pasumaxp=$fanout;
}
my $noderange = shift @ARGV;
my @nodes=();
#print "fanout=$fanout, username=$username, noderange=$noderange\n";
my $nodeattrs;
if ($::NONODECHECK) {
@nodes=split(/,/, $noderange);
}
else { # contact xcatd to expand noderange and get ipmi attrs
# do not need to do this call, because getting the ipmi atts will also give us a list of nodes
#@nodes = expandnoderange($noderange);
# this is reference to a hash, each key is the nodename and the value is a reference to a hash of attr values
$nodeattrs = getipmiattrs($noderange);
#print Dumper($nodeattrs);
#foreach my $k (keys(%$nodeattrs)) {
# print "$k:\n";
# my $subhash = $nodeattrs->{$k};
# foreach my $k2 (keys(%$subhash)) { print " $k2=", $subhash->{$k2}, "\n"; }
#}
#exit;
@nodes = keys(%$nodeattrs);
}
my ($defaultuser, $defaultpw);
if (!defined($username) || !defined($passwd)) {
($defaultuser, $defaultpw) = getdefaultcredentials();
#if (!defined($username)) { $username = $user; }
#if (!defined($passwd)) { $passwd = $pw; }
if ($::VERBOSE) { print "default username=$defaultuser, passwd=$defaultpw\n"; }
}
my $children = 0;
my $inputs = new IO::Select;
my %pids; # pid => node
my %exitcodes; # Keep a list of children with known exit codes
my %foundcodes;
my @retries; # the nodes that fail with connection error
if ($interface) {
foreach (@nodes) {
s/$/-$interface/;
}
}
# Fork the processes for running asu for each node
#todo: i thought we would need retry logic because i thought asu had problems running
# in parallel. So far (up to 24 nodes) have not had any problem, but keeping the
# logic in just in case.
@retries = @nodes;
while (scalar(@retries)) {
@nodes = @retries;
@retries = ();
foreach (@nodes) {
my $node=$_;
my $ipmiattrs = $nodeattrs->{$node};
my $bmc = $ipmiattrs->{bmc};
if (!defined($bmc)) {
print "$node: the ipmi.bmc attribute is not defined, skipping.\n";
next;
}
# if we have already forked the max # of simultaneous processes, wait till 1 finishes
while ($children > $pasumaxp) { processoutput($inputs); }
# fork anothe process
my $child;
$children++;
# precedence on the username and password is: cli option, ipmi table, passwd table
my ($user, $pw);
if (defined($username)) { $user = $username; } # cli option
elsif (defined($ipmiattrs->{username})) { $user = $ipmiattrs->{username}; }
else { $user = $defaultuser; }
if (defined($passwd)) { $pw = $passwd; } # cli option
elsif (defined($ipmiattrs->{password})) { $pw = $ipmiattrs->{password}; }
else { $pw = $defaultpw; }
if ($::VERBOSE) { print "For node $node using bmc=$bmc, user=$user, pw=$pw\n"; }
#asunode(\$child,$node,$username,$passwd,@ARGV[1 .. $#ARGV]);
asunode(\$child,$node,$bmc,$user,$pw,$batchfile,@ARGV); # child is the fd of the child process
$inputs->add($child);
$nodehdl{$child} = $node;
}
# quiesce everything
while ($inputs->count) {
processoutput($inputs);
}
while (processoutput($inputs)) {};
while (wait != -1) {
yield;
}
}
my $exitcode=0;
foreach (values %pids) {
my $possible_codes = join ",",keys %foundcodes;
unless (defined $exitcodes{$_}) {
print stderr "$_: *** pasu missed exit code, probably one of the following: $possible_codes\n";
}
}
foreach (keys %exitcodes) {
if ($exitcodes{$_}) {
print stderr "$_: *** asu exited with error code ".$exitcodes{$_}.".\n";
$exitcode++;
}
}
if ($exitcode) { #Exit code reflects number of failed nodes
$exitcode=$exitcode%256; #keep from overflowing valid values
unless ($exitcode) { #if number of failed nodes happened to be evenly divisible by 256, make it non-zero again
$exitcode++;
}
}
exit($exitcode);
# Process output on the select stmt from the forked cmds
sub processoutput { #This way, one arbiter handles output, no interrupting
my $inputs = shift; # the select object that contains all the file descriptors
my @readyins = $inputs->can_read(1); # child fds with some output available
my $rc = @readyins;
my $readyh;
foreach $readyh (@readyins) {
my $cursel = new IO::Select; # need to do non-blocking reads on this fd
$cursel->add($readyh);
while ($cursel->can_read(0)) {
my $line = <$readyh>;
unless ($line) {
$inputs->remove($readyh);
close($readyh);
$exitcodes{$nodehdl{$readyh}} = $? >> 8;
$children--;
next;
}
chomp($line);
if ($::RETRY && ($line =~ /Connection link error/i) ) {
if ($::VERBOSE) { print "Need to retry $nodehdl{$readyh}\n"; }
push @retries, $nodehdl{$readyh};
} elsif ($::DONOTFILTER || ($line!~/IBM Advanced Settings Utility version/i &&
$line!~/Licensed Materials - Property of IBM/i &&
$line!~/\(C\) Copyright IBM Corp. \d+-\d+ All Rights Reserved/i &&
$line!~/Connected to IMM at IP address/i )) {
print $nodehdl{$readyh}.": ".$line."\n";
}
}
}
no strict 'subs';
IO::Handle::flush(stdout);
use strict 'subs';
yield; #Explicitly give all children a chance to refill any buffers
return $rc;
}
# Fork the asu cmd for 1 node
sub asunode {
my $out = shift; # this is a reference to the child file descriptor
my $node = shift;
my $bmc = shift;
my $username = shift;
my $passwd = shift;
my $batchfile = shift;
my $args;
if ($batchfile) {
$args = "batch $batchfile";
} else {
foreach my $a (@_) { $args .= ' ' . xCAT::Utils->quote($a); }
}
my $cmd = "$::asucmd $args --host $bmc --user $username --password $passwd 2>&1 |";
if ($::VERBOSE) { print "forking $cmd\n"; }
my $pid = open($$out, $cmd);
$pids{$pid} = $node;
}
# Contact xcatd to expand the noderange into a list of nodes
sub expandnoderange {
my $noderange = shift @_;
my @nodes;
my @user = getpwuid($>);
my $homedir=$user[7];
my $client = IO::Socket::SSL->new(
PeerAddr=>$xcathost,
SSL_key_file=>$homedir."/.xcat/client-cred.pem",
SSL_cert_file=>$homedir."/.xcat/client-cred.pem",
SSL_ca_file => $homedir."/.xcat/ca.pem",
SSL_use_cert => 1,
#SSL_verify_mode => 1,
);
die "Connection failure: $!\n" unless ($client);
#todo: get the bmc attr for each node, not the node name itself
my %cmdref = (command => 'noderange', noderange => $noderange);
$SIG{ALRM} = sub { die "No response getting noderange" };
alarm(15);
my $msg = XMLout(\%cmdref,RootName=>'xcatrequest', NoAttr=>1, KeyAttr => []);
if ($ENV{XCATXMLTRACE}) { print $msg; }
print $client $msg;
alarm(15);
my $response="";
while (<$client>) {
alarm(0);
$response .= $_;
if ($response =~ m/<\/xcatresponse>/) {
if ($ENV{XCATXMLTRACE}) { print $response; }
my $rsp=XMLin($response, ForceArray => ['node']);
$response='';
if ($rsp->{warning}) {
printf "Warning: ".$rsp->{warning}."\n";
}
if ($rsp->{error}) {
die ("ERROR: ".$rsp->{error}."\n");
} elsif ($rsp->{node}) {
@nodes=@{$rsp->{node}};
}
if ($rsp->{serverdone}) {
last;
}
}
}
close($client);
if ($::VERBOSE) { print 'Nodes:', join(',',@nodes), "\n"; }
return @nodes;
}
# Contact xcatd to get from the ipmi table for this list of nodes: bmc, username, password
sub getipmiattrs {
my $noderange = shift @_;
my $nodeattrs; # this will be a reference to a hash
my @user = getpwuid($>);
my $homedir=$user[7];
my $client = IO::Socket::SSL->new(
PeerAddr=>$xcathost,
SSL_key_file=>$homedir."/.xcat/client-cred.pem",
SSL_cert_file=>$homedir."/.xcat/client-cred.pem",
SSL_ca_file => $homedir."/.xcat/ca.pem",
SSL_use_cert => 1,
#SSL_verify_mode => 1,
);
die "Connection failure: $!\n" unless ($client);
my %cmdref = (command => 'getNodesAttribs', noderange => $noderange, table => 'ipmi');
push (@{$cmdref{attr}}, qw(bmc username password));
$SIG{ALRM} = sub { die "No response getting ipmi attributes" };
alarm(15);
my $msg = XMLout(\%cmdref,RootName=>'xcatrequest', NoAttr=>1, KeyAttr => []);
if ($ENV{XCATXMLTRACE}) { print $msg; }
print $client $msg;
alarm(15);
my $response="";
while (<$client>) {
alarm(0);
$response .= $_;
if ($response =~ m/<\/xcatresponse>/) {
if ($ENV{XCATXMLTRACE}) { print $response; }
my $rsp=XMLin($response, ForceArray => ['node']);
$response='';
if ($rsp->{warning}) {
printf "Warning: ".$rsp->{warning}."\n";
}
if ($rsp->{error}) {
die ("ERROR: ".$rsp->{error}."\n");
} elsif ($rsp->{node}) {
#print Dumper($rsp->{node});
$nodeattrs=$rsp->{node}; # this is reference to a hash, each key is the nodename and the value is a reference to a hash of attr values
}
if ($rsp->{serverdone}) {
last;
}
}
}
close($client);
#if ($::VERBOSE) { print 'Nodes:', join(',',$nodeattrs), "\n"; }
return $nodeattrs;
}
# Contact xcatd to get the default user/pw for ipmi in the xcat passwd table
sub getdefaultcredentials {
my @data;
my @user = getpwuid($>);
my $homedir=$user[7];
my $client = IO::Socket::SSL->new(
PeerAddr=>$xcathost,
SSL_key_file=>$homedir."/.xcat/client-cred.pem",
SSL_cert_file=>$homedir."/.xcat/client-cred.pem",
SSL_ca_file => $homedir."/.xcat/ca.pem",
SSL_use_cert => 1,
#SSL_verify_mode => 1,
);
die "Connection failure: $!\n" unless ($client);
#todo: use lissas new db api instead
my %cmdref = (command => 'tabdump', arg => 'passwd');
#push (@{$cmdref->{arg}}, 'passwd');
$SIG{ALRM} = sub { die "No response getting userid and password" };
alarm(15);
my $msg = XMLout(\%cmdref,RootName=>'xcatrequest', NoAttr=>1, KeyAttr => []);
if ($ENV{XCATXMLTRACE}) { print $msg; }
print $client $msg;
alarm(15);
my $response="";
while (<$client>) {
alarm(0);
$response .= $_;
if ($response =~ m/<\/xcatresponse>/) {
if ($ENV{XCATXMLTRACE}) { print $response; }
my $rsp=XMLin($response, ForceArray => ['node']);
$response='';
if ($rsp->{warning}) {
printf "Warning: ".$rsp->{warning}."\n";
}
if ($rsp->{error}) {
die ("ERROR: ".$rsp->{error}."\n");
} elsif ($rsp->{data}) {
@data=@{$rsp->{data}};
}
if ($rsp->{serverdone}) {
last;
}
}
}
close($client);
# go thru the data lines and find the ipmi row
my ($user, $pw);
foreach my $d (@data) {
#if ($::VERBOSE) { print "$d\n"; }
my @cols = split(',', $d);
if ($cols[0] eq '"ipmi"') {
($user) = $cols[1] =~ /"(.*)"/;
($pw) = $cols[2] =~ /"(.*)"/;
last;
}
}
if (!defined($user) || !defined($pw)) { die "Did not find the ipmi username and password in the xCAT passwd table.\n"; }
return ($user, $pw);
}
# vim: set et ts=2 sts=2 sw=2 :