From 8718bbc012fe1fb1604d396242bc58a3a961e773 Mon Sep 17 00:00:00 2001 From: wanghuaz Date: Wed, 7 Jan 2009 14:16:55 +0000 Subject: [PATCH] Add IB support in xCAT. configiba is used to configure secondary adapter as a sample post script. annotatelog is to gather and parse switch log and subnet manager log. Please refer to the READMEs for usage and more details. git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@2575 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd --- xCAT-server/share/xcat/ib/scripts/annotatelog | 1071 +++++++++++++++++ .../share/xcat/ib/scripts/annotatelog.README | 245 ++++ xCAT-server/share/xcat/ib/scripts/configiba | 88 ++ .../share/xcat/ib/scripts/configiba.README | 68 ++ xCAT-server/share/xcat/ib/scripts/getGuids | 653 ++++++++++ .../share/xcat/ib/scripts/getGuids.README | 69 ++ xCAT-server/xCAT-server.spec | 2 + 7 files changed, 2196 insertions(+) create mode 100644 xCAT-server/share/xcat/ib/scripts/annotatelog create mode 100644 xCAT-server/share/xcat/ib/scripts/annotatelog.README create mode 100644 xCAT-server/share/xcat/ib/scripts/configiba create mode 100644 xCAT-server/share/xcat/ib/scripts/configiba.README create mode 100644 xCAT-server/share/xcat/ib/scripts/getGuids create mode 100644 xCAT-server/share/xcat/ib/scripts/getGuids.README diff --git a/xCAT-server/share/xcat/ib/scripts/annotatelog b/xCAT-server/share/xcat/ib/scripts/annotatelog new file mode 100644 index 000000000..882408827 --- /dev/null +++ b/xCAT-server/share/xcat/ib/scripts/annotatelog @@ -0,0 +1,1071 @@ +#!/usr/bin/perl +# IBM_PROLOG_BEGIN_TAG +# This is an automatically generated prolog. +# +# +# +# Licensed Materials - Property of IBM +# +# (C) COPYRIGHT International Business Machines Corp. 2008 +# All Rights Reserved +# +# US Government Users Restricted Rights - Use, duplication or +# disclosure restricted by GSA ADP Schedule Contract with IBM Corp. +# +# IBM_PROLOG_END_TAG + + +########################################################################### +# # +# Command: annotatelog # +# # +#-------------------------------------------------------------------------# +# This xCAT script is used to parse the QLogic log entries in file +# /var/log/xcat/errorlog/[xCAT Management Nodes] on CSM Management +# Node by subnet manager, IB node, chassis, FRU(Field-Replaceable Unit) or a +# particular node. This script is supported by both AIX and Linux MN. +# From xCAT's point of view, the log to analyze must be xCAT +# consolidated log, which means this log file must come from xCAT +# syslog/errorlog monitoring mechanism, such as +# /var/log/xcat/errorlog/[xCAT Management Nodes] file. Since the log format +# is various, xCAT do not support other log files. +# Command Syntax: +# annotatelog -f log_file [-s start_time] [-e end_time] +# { [-i -g guid_file -l link_file] [-S] [-c] [-u]| [-a -g guid_file -l link_file]} +# {[-n node_list -g guid_file] [-E]} +# [-h] +# +# -A Output the combination of -i, -S, -c and -u. It should be used with -g and -l flags. +# -f log_file +# Specifies a log file fullpath name to analyze. +# Must be xCAT consolidated log got from Qlogic HSM or ESM. +# -s start_time +# Specifies the start time for analysis, where the start_time +# variable has the format ddmmyyhh:mm:ss (day, month, year, +# hour, minute, and second), 00:00:00 is valid. +# -e end_time +# Specifies the end time for analysis, where the end_time +# variable has the format ddmmyyhh:mm:ss (day, month, year, +# hour, minute, and second), 00:00:00 is valid. +# -l link_file +# Specifies a link file fullpath name, which concatenates all +# '/var/opt/iba/analysis/baseline/fabric*links' files from all fabric management nodes. +# -g guid_file +# Specifies a guid file fullpath name, which has a list of +# GUIDs as obtained from the "getGuids" script. +# -E Annotate with node ERRLOG_ON and ERRLOG_OFF information. This +# can help determine if a disappearance was caused by a node +# disappearing. It is for AIX nodes only and should be used with -x or -n flag. +# -S Sort the log entries by subnet manager only. +# -i Sort the log entries by IB node only. +# -c Sort the log entries by chassis only. +# -u Sort the log entries by FRU only. +# -n node_list +# Specifies a comma-separated list of node host names, IP addresses to look up in log entries. +# -h Display usage information. +# Exit codes: +# 0 - success +# 1 - fail +########################################################################### + +use strict; +use Getopt::Long; +use Time::Local; + +#Maps between HCA adapter and connector +%::HCAMAP = ( + "iba0_lsw1" => "C65-T1 (HV=Cx-T1)", "iba0_lsw2" => "C65-T2 (HV=Cx-T2)", + "iba1_lsw1" => "C65-T3", "iba1_lsw2" => "C65-T4", + "iba2_lsw1" => "C66-T1", "iba2_lsw2" => "C66-T2", + "iba3_lsw1" => "C66-T3", "iba3_lsw2" => "C66-T4", + "ehca0_lsw1" => "C65-T1 (HV=Cx-T1)", "ehca0_lsw2" => "C65-T2 (HV=Cx-T2)", + "ehca1_lsw1" => "C65-T3", "ehca1_lsw2" => "C65-T4", + "ehca2_lsw1" => "C66-T1", "ehca2_lsw2" => "C66-T2", + "ehca3_lsw1" => "C66-T3", "ehca3_lsw2" => "C66-T4", +); + +$::OK = 0; +$::NOK = 1; +$::SHSEP = "###############################################################################"; +$::DASEP = "-------------------------------------------------------------------------------"; + +#Logs sorted by kinds of method +%::SMLOGS; +%::FRULOGS; +%::CHASSISLOGS; +%::NODELOGS; +%::NODELOGSBYNAME; +%::OTHERLOGS; +#The start time and end time of logs that will be analyzed +$::Start_Epoch; +$::End_Epoch; +#Record the relationship between guid and it's information +%::GUIDS; +#Record links between HCA and Switch and links between Switch and Swtich +%::HCALinks; +#ERRLOG_ON and ERRLOG_OFF information +%::ELOGON; +%::ELOGOFF; +#Information related with a node +%::NodeInfo; +#List of nodes that their logs will be analyzed +@::NodeList; + +# MAIN Main main# +&getArgs(); +&getLogs($::LOG_FILE); +&outputLogs(); +exit $::OK; + +#-------------------------------------------------------------------------------- + +=head3 getArgs + Parse the command line and check the values + Notes: +=cut + +#-------------------------------------------------------------------------------- +sub getArgs() +{ + $Getopt::Long::ignorecase = 0; + if ( + !GetOptions( + 'h' => \$::HELP, + 'A' => \$::ALL, + 'f=s' => \$::LOG_FILE, + 's=s' => \$::START_TIME, + 'e=s' => \$::END_TIME, + 'g=s' => \$::GUIDS_FILE, + 'l=s' => \$::LINKS_FILE, + 'E' => \$::ERRLOG, + 'S' => \$::SM, + 'i' => \$::IBNODE, + 'c' => \$::CHASSIS, + 'u' => \$::FRU, + 'n=s' => \$::NODE_LIST + ) + ) + { + usage(); + exit $::NOK; + } + if ( $::HELP == 1 ) + { + usage(); + exit $::OK; + } + if ( $::LOG_FILE eq "" ) + { + print "Please use \"-f\" to specify a log file."; + usage(); + exit $::NOK; + } + + if ( $::SM == 0 && $::IBNODE == 0 && $::CHASSIS == 0 && $::FRU == 0 && $::NODE_LIST eq "") + { + #If there is no flag specified, -A is default + $::ALL = 1; + } + + if ( $::ALL == 1 && ($::SM == 1 || $::IBNODE == 1 || $::CHASSIS == 1 || $::FRU == 1)) + { + print "The flag -i, -S, -c or -u should not be used with -A."; + usage(); + exit $::NOK; + } + + if ( ($::ALL == 1 || $::IBNODE == 1) && ( $::GUIDS_FILE eq "" || $::LINKS_FILE eq "")) + { + print "The -g flag and -l flag should be used with -a or -i flag.\n"; + usage(); + exit $::NOK; + } + + if ( $::ERRLOG == 1 && ( $::IBNODE == 0 && $::ALL == 0 && $::NODE_LIST eq "") ) + { + print "The -E flag should be used with -n or -i or -a flag.\n"; + usage(); + exit $::NOK; + } + + if ( $::NODE_LIST ne "" && $::GUIDS_FILE eq "" ) + { + print "The -n flag should be used with -g flag.\n"; + usage(); + exit $::NOK; + } + + if ( $::NODE_LIST ne "") + { + @::NodeList = split /,/, $::NODE_LIST; + #List all nodes in Managed or MinManaged mode + my @AllNodes = `nodels`; chomp @AllNodes; + my @NotDefinedNodes; + foreach my $node (@::NodeList) + { + #If there are IP addresses in NodeList, convert IP to hostname first + if (isIpaddr($node)) + { + my ($hostname, $ip) = getHost($node); + $node = $hostname; + } + #Check nodes in NodeList and find those that are not defined in CSM database or not in Managed or MinManaged mode + if (!(grep {$_ =~ /$node/} @AllNodes)) + { + push (@NotDefinedNodes, $node); + next; + } + } + if (scalar(@NotDefinedNodes)) + { + my $badnodes = join ",", @NotDefinedNodes; + print "Could not find Managed Node(s) $badnodes in the node database. Run lsnode -w \"Mode='Managed' or Mode='MinManaged'\" to see the list of valid node names.\n"; + exit $::NOK; + } + } + + if ( $::GUIDS_FILE ne "" ) { %::GUIDS = getGUIDs($::GUIDS_FILE); } + if ( $::LINKS_FILE ne "" ) { %::HCALinks = getHCALinks($::LINKS_FILE); } + + if ( $::START_TIME ne "" ) + { + $::Start_Epoch = epochTime($::START_TIME); + } + else + { + $::Start_Epoch = 0; + } + + if ( $::END_TIME ne "" ) + { + $::End_Epoch = epochTime($::END_TIME); + } + else + { + undef $::End_Epoch; + } +} + +#------------------------------------------------------------------------------- +- + +=head3 isIpaddr + + returns 1 if parameter is has a valid IP address form. + + Arguments: + dot qulaified IP address: e.g. 1.2.3.4 + Returns: + 1 - if legal IP address + 0 - if not legal IP address. + Globals: + none + Error: + none + Example: + if ($ipAddr) { blah; } + Comments: + Doesn't test if the IP address is on the network, + just tests its form. + +=cut + +#------------------------------------------------------------------------------- +- +sub isIpaddr +{ + my ($class, $addr) = @_; + + #print "addr=$addr\n"; + if ($addr !~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) + { + return 0; + } + + if ($1 > 255 || $1 == 0 || $2 > 255 || $3 > 255 || $4 > 255) + { + return 0; + } + else + { + return 1; + } +} + +#------------------------------------------------------------------------------- +- + +#------------------------------------------------------------------------------- +- + +=head3 getHost + + Return primary hostname and ip address for the given hostname or + ip address and die if hostname resolution fails. + + Arguments: + A string: either an Ip Address or a HostName. + Returns: + ($nodename, $nodeip) as strings + Error: + returns 0; + Example: + my ($nodename, $nodeip) = getHost($node); + Comments: + none + +=cut + +#------------------------------------------------------------------------------- +- + +sub getHost +{ + my ($class, $arg, $live) = @_; + + #print "arg=$arg\n"; + my ($hostname, $ipaddr); + if (isIpaddr($arg)) + { + $ipaddr = $arg; + my $packedaddr = inet_aton($ipaddr); + $hostname = gethostbyaddr($packedaddr, AF_INET); + if (!$hostname) + { + $hostname = $ipaddr; + } + + #print "hostname=$hostname\n"; + else # they specified a hostname + { + $hostname = $arg; # this may be a short hostname + my ($name, $aliases, $addrtype, $length, @addrs) = + gethostbyname($hostname); + + #print "name=$name, # of addrs=$#addrs.\n"; + my $packedaddr = $addrs[0]; + $ipaddr = inet_ntoa($packedaddr); + $hostname = + $name; # they may have specified a shorter or non-primary name + #print "ipaddr=$ipaddr, hostname=$hostname\n"; + } + if $ipaddr =~ /127\.0\.0/; + return ($hostname, $ipaddr); +} + +#-------------------------------------------------------------------------------- + +=head3 getLogs + Parse the logs + Notes: +Arguments: + $logfile - The file name of log file +Returns: + +=cut + +#-------------------------------------------------------------------------------- +sub getLogs +{ + my ($logfile) = @_; + my $tsepoch; + unless (open(LOGFILE, $logfile)) + { + print "Can't open '$logfile':\n\t$!"; + exit $::NOK; + } + + foreach my $line () + { + chomp $line; + $tsepoch = 0; + # only work with local6 logs + #if ( $line =~ /local6/ ) { + # parse up log entries + if ($line =~ /SM:.*COND:#12/ and ($::ALL == 1 || $::SM == 1)) + { + my ($ts, $level, $sm, $cond, $node, $linkedto, $detail); + #Parse a line of logs + #The DETAIL is not always there + if ( $line =~ /(.........:..:..).*MSG:(\w*)\|SM:(.*)\|COND:(.*)\|NODE:(.*)\|LINKEDTO:(.*)\|DETAIL:(.*)/ ) + { + ($ts, $level, $sm, $cond, $node, $linkedto, $detail) = ($1, $2, $3, $4, $5, $6, $7); + } + elsif ($line =~ /(.........:..:..).*MSG:(\w*)\|SM:(.*)\|COND:(.*)\|NODE:(.*)\|DETAIL:(.*)/) + { + ($ts, $level, $sm, $cond, $node, $detail) = ($1, $2, $3, $4, $5, $6); + } + + $tsepoch = epochTime($ts) if($ts); + #Check the time of this log is in the period specified + if ( $tsepoch >= $::Start_Epoch && (!defined($::End_Epoch) || ($ts && $tsepoch <= $::End_Epoch)) ) + { + $::SMLOGS{$sm} = "$::SMLOGS{$sm}\n$line"; + } + next; + } + + if ($line =~ /CHASSIS:/ and ($::ALL == 1 || $::CHASSIS == 1 || $::FRU == 1)) + { + my ($ts, $level, $chassis, $cond, $FRU, $pn, $details); + if ( $line =~ /(.........:..:..).*MSG:(\w*)\|CHASSIS:(.*)\|COND:(.*)\|FRU:(.*)\|PN:(.*)\|DETAIL:(.*)/ ) + { + ($ts, $level, $chassis, $cond, $FRU, $pn, $details) = ($1, $2, $3, $4, $5, $6, $7); + } + elsif ($line =~ /(.........:..:..).*MSG:(\w*)\|CHASSIS:(.*)\|COND:(.*)\|FRU:(.*)\|PN:(.*)/) + { + ($ts, $level, $chassis, $cond, $FRU, $pn) = ($1, $2, $3, $4, $5, $6); + } + + $tsepoch = epochTime($ts) if($ts); + if ( $tsepoch >= $::Start_Epoch && (!defined($::End_Epoch) || ($ts && $tsepoch <= $::End_Epoch)) ) + { + $FRU = "$chassis - $FRU"; + $::FRULOGS{$FRU} = "$::FRULOGS{$FRU}\n$line" if ($::ALL == 1 || $::FRU == 1); + $::CHASSISLOGS{$chassis} = "$::CHASSISLOGS{$chassis}\n$line" if ($::ALL == 1 || $::CHASSIS == 1); + } + next; + } + + if ($line =~ /SM:/ && ($::ALL == 1 || $::SM == 1 || $::IBNODE == 1 || $::NODE_LIST ne "")) + { + my $guid; + my $port; + my $guidref; + my $realguid; + my $lsw; + my $lswref; + my $hcaphys; + my $host; + my $elog_entry; + my ($ts, $level, $sm, $cond, $node, $linkedto, $detail); + if ( $line =~ /(.........:..:..).*MSG:(\w*)\|SM:(.*)\|COND:(.*)\|NODE:(.*)\|LINKEDTO:(.*)\|DETAIL:(.*)/) + { + ($ts, $level, $sm, $cond, $node, $linkedto, $detail) = ($1, $2, $3, $4, $5, $6, $7); + } + elsif ( $line =~ /(.........:..:..).*MSG:(\w*)\|SM:(.*)\|COND:(.*)\|NODE:(.*)\|LINKEDTO:(.*)/ ) + { + ($ts, $level, $sm, $cond, $node, $linkedto) = ($1, $2, $3, $4, $5, $6); + } + elsif ( $line =~ /(.........:..:..).*MSG:(\w*)\|SM:(.*)\|COND:(.*)\|NODE:(.*)\|DETAIL:(.*)/ ) + { + ($ts, $level, $sm, $cond, $node, $detail) = ($1, $2, $3, $4, $5, $6); + } + elsif ( $line =~ /(.........:..:..).*MSG:(\w*)\|SM:(.*)\|COND:(.*)\|NODE:(.*)/ ) + { + ($ts, $level, $sm, $cond, $node) = ($1, $2, $3, $4, $5); + } + if ( $sm eq "") { $sm = "Other"; } + + $tsepoch = epochTime($ts) if($ts); + if ( $tsepoch >= $::Start_Epoch && (!defined($::End_Epoch) || ($ts && $tsepoch <= $::End_Epoch)) ) + { + if ($node =~ /IBM .*Logical/ && ($::ALL == 1 || $::IBNODE == 1 || $::NODE_LIST ne "")) + { + $guid = $node; + $guid =~ s/.*:0x//; + $port = $node; + $port =~ s/.*port //; + $port =~ s/:.*//; + if ( $node =~ /IBM .*Logical Switch/ ) + { + #It's for Logical Switch + $realguid = $guid; + $guid =~ s/..$/00/; + $node =~ /.*Switch (\d):.*/; + $lsw = $1; + #The key for HCALinks to find the next link of this Logical Switch + $guidref = "$realguid:1"; + } + else + { + #It's for Logical HCA + $::GUIDS{$guid}{type} =~ /.*(\d+)/; + $lsw = $1; + #The key for HCALinks to find the linked Logical Switch of this Logical HCA + $guidref = "$guid:$port"; + $lswref = $::HCALinks{$guidref}; + $lswref =~ /.*GUID\s*0x(\w+).*/; + $lswref = $1; + #The key for HCALinks to find the next link of the Logical Switch + $lswref = "$lswref:1"; + } + my $hcadev = $::GUIDS{$guid}{dev}; + my $hcalu = "${hcadev}_lsw${lsw}"; + #Find the connector + $hcaphys = $::HCAMAP{$hcalu}; + + $host = $::GUIDS{$guid}{host}; + #If this it a log for HCA, the guid can be translated to a hostname using GUIDS map + $node = "$host :port $port:0x$guid" if($host); + if ( $::NODE_LIST ne "" ) + { + if ( grep {$host =~ /$_/ || $_ =~ /$host/} @::NodeList ) + { + $::NODELOGSBYNAME{$host} = "$::NODELOGSBYNAME{$host}\n$line"; + } + } + #If -E is specified, get the ERRLOG_ON and ERRLOG_OFF information + if ( $::ERRLOG == 1 && $host && ($::ALL == 1 || $::IBNODE == 1 || ($::NODE_LIST ne "" && grep {$host =~ /$_/ || $_ =~ /$host/} @::NodeList))) + { + if ( ! exists $::ELOGON{$host}) + { + my $ret = getERRLOG($host); + ($::ELOGON{$host}, $::ELOGOFF{$host}) = split/[_]/, $ret; + } + $elog_entry = "\n# ERRLOG_ON: $::ELOGON{$host}\n# ERRLOG_OFF: $::ELOGOFF{$host}"; + } + if ($::ALL == 1 || $::IBNODE == 1) + { + $::HCALinks{$guidref} =~ /(.+)\s*port.*/; + my $swname = $1; + my $lswn = $::HCALinks{$guidref} ? $::HCALinks{$guidref} : "Not Found"; + my $swn = $::HCALinks{$lswref} ? $::HCALinks{$lswref} : "Not Found"; + + if ( (! exists $::NodeInfo{$node}) && $node) + { + if ( $node =~ /IBM .*Logical Switch/ ) + { + $::NodeInfo{$node} = "- <-> $lswn"; + } + else + { + $::NodeInfo{$node} = "- $::GUIDS{$guid}{dev} <-> $lswn\n" . + "- Connector is $hcaphys\n" . + "- $swname<-> $swn"; + if ( ($::ALL == 1 || $::IBNODE == 1) && $::ERRLOG == 1 ) + { + $::NodeInfo{$node} = $::NodeInfo{$node} . "\n$::DASEP\n# $host$elog_entry"; + } + } + } + } + if ($::ERRLOG == 1 && $host && $::NODE_LIST ne "" && (grep {$host =~ /$_/ || $_ =~ /$host/} @::NodeList)) + { + $::NodeInfo{$host} = "$elog_entry"; + } + } + + $::SMLOGS{$sm} = "$::SMLOGS{$sm}\n$line" if ($::ALL == 1 || $::SM == 1); + $::NODELOGS{$node} = "$::NODELOGS{$node}\n$line" if ($::ALL == 1 || $::IBNODE == 1); + } + next; + } + if ($line =~ /.*/ and ($::ALL == 1)) + { + $line =~ /(.........:..:..).*/; + my $tsepoch = epochTime($1) if($1); + if ( $tsepoch >= $::Start_Epoch && (!defined($::End_Epoch) || ($1 && $tsepoch <= $::End_Epoch)) ) + { + $::OTHERLOGS{"others"} = "$::OTHERLOGS{\"others\"}\n$line"; + } + } + #} + } + close LOGFILE; + return; +} + +#-------------------------------------------------------------------------------- + +=head3 outputLogs + Output the results of logs analysis + Notes: +=cut + +#-------------------------------------------------------------------------------- +sub outputLogs +{ + my $SMLOGHDR = "Logs by Subnet Manager"; + my $SMLOGSECT = "Reported by subnet manager: "; + my $IBNODEHDR = "Logs by IB node"; + my $IBNODESECT = "Reported by IB node: "; + my $IBNODEHDRBYNAME = "Logs for special nodes"; + my $CHLOGHDR = "Logs by CHASSIS"; + my $CHLOGSECT = "Reported by chassis: "; + my $FRULOGHDR = "Logs by FRUS from CHASSIS"; + my $FRULOGSECT = "Associated with FRU: "; + my $OTLOGHDR = "Logs by Others"; + my $OTLOGSECT = ""; + + print < 0, "Feb" => 1, "Mar" => 2, "Apr" => 3, "May" => 4, + "Jun" => 5, "Jul" => 6, "Aug" => 7, "Sep" => 8, "Oct" => 9, + "Nov" => 10, "Dec" => 11, + ); + + if ( length($timestring) > 1 ) + { + #If the time string is not of ddmmyyhh:mm:ss format, it will be a time stamp of a log + if ( $timestring =~ /\S+\s+\d+\s+\d+:\d+:\d+/ ) + { + my $mont = substr $timestring, 0, 3; + my $dayt = substr $timestring, 4, 2; + my $time = substr $timestring, 7; + if (exists $themon{$mont}) + { + $mont = $themon{$mont} + 1; + } + my ($psec, $pmin, $phour, $pmday, $pmon, $pyear, $pwday, $pyday, $pisdst) = localtime(); + $pyear = $pyear + 1900; + my $year = substr $pyear, 2, 2; + if ($dayt < 10) { $dayt =~ s/ /0/g; } + if ($mont < 10) { $mont = "0$mont"; } + #Format the timestring to ddmmyyhh:mm:ss + $timestring = "$dayt$mont$year$time"; + } + } + + if ( ! ($timestring =~ /^\d+:\d+:\d+$/) ) + { + print "Invalid time '$ots'\n"; + exit $::NOK; + } + + my $day = substr $timestring, 0, 2; + if ( !($day =~ /^\d+$/) || $day < 1 || $day > 31 ) + { + $formaterror = 1; + } + my $mon = substr $timestring, 2, 2; + if ( !($mon =~ /^\d+$/) || $mon < 1 || $mon > 12 ) + { + $formaterror = 1; + } + $mon = $mon - 1; + my $year = substr $timestring, 4, 2; + if ( !($year =~ /^\d+$/) ) + { + $formaterror = 1; + } + $year = $year - 1900; + if ( $year < 0 ) { $year = $year + 2000 } + if ( $year > 138 ) { $formaterror = 1; } + my $hour = substr $timestring, 6, 2; + if ( !($hour =~ /^\d+$/) || $hour < 0 || $hour > 23 ) + { + $formaterror = 1; + } + my $min = substr $timestring, 9, 2; + if ( !($min =~ /^\d+$/) || $min < 0 || $min > 59 ) + { + $formaterror = 1; + } + my $sec = substr $timestring, 12, 2; + if ( !($sec =~ /^\d+$/) || $sec < 0 || $sec > 59 ) + { + $formaterror = 1; + } + if($formaterror) + { + print "Invalid time '$ots'\n"; + exit $::NOK; + } + $epoch = timegm($sec, $min, $hour, $day, $mon, $year); + + return ($epoch); +} + +#-------------------------------------------------------------------------------- + +=head3 usage + + usage for annotatelog + +=cut + +#-------------------------------------------------------------------------------- +sub usage +{ + + print <) + { + chomp $line; + if ($line =~ /(\S*): (\S*): (\S*): (\w*$)/) + { + my ($host, $dev, $type, $guid) = ($1, $2, $3, $4); + $guids{$guid}{host} = $host; + $guids{$guid}{dev} = $dev; + $guids{$guid}{type} = $type; + } + } + close GUIDSFILE; + return %guids; +} + +#-------------------------------------------------------------------------------- + +=head3 getHCALinks + Get links between HCA and Switch and links between Switch and Swtich + Notes: +Arguments: + $hcalfile - A link file fullpath name, which concatenates all '/var/opt/iba/analysis/baseline/fabric*links' files from all fabric management nodes. +Returns: + %hcalinks - A hash table recording links +=cut + +#-------------------------------------------------------------------------------- + +sub getHCALinks +{ + my ($hcalfile) = @_; + my $getit = 0; + my $hcaport = ""; + my %hcalinks; + + unless(open(LINKS, $hcalfile)) + { + print "Can't open HCA Links file '$hcalfile':\n\t$!"; + exit $::NOK; + } + foreach my $line () + { + if ( $getit == 1 ) + { + if ($line =~ /.*0x(\S*)\s+(\d+)\s+(\w+)\s+(.*)/) + { + my ($guid, $port, $type, $name) = ($1, $2, $3, $4); + $hcalinks{$hcaport} = "$name port $port (GUID 0x$guid)"; + $getit = 0; + } + } + if ($line =~ /g.*IBM.*Logical/) + { + if ($line =~ /.*g 0x(\S*) *(\d*) .*/) + { + my $port; + ($hcaport, $port) = ($1, $2); + $hcaport = "$hcaport:$port"; + $getit = 1; + } + } + else + { + $getit = 0; + } + } + + foreach my $key (keys %::HCALinks) + { + print "$key -> $hcalinks{$key}\n"; + } + close LINKS; + return %hcalinks; +} + +#-------------------------------------------------------------------------------- + +=head3 checkDshReachability + + Notes: Check the dsh reachability between the Management Nodes + and node. + +Arguments: + $node - the remote node hostname. + +Returns: + $::OK - The remote node is reachable through dsh. + $::NOK - The remote node is unreachable through dsh. + +=cut + +#-------------------------------------------------------------------------------- +sub checkDshReachability() +{ + my ($node) = @_; + my $output = `dsh -Q -n $node date 2>/dev/null`; + return $?; +} diff --git a/xCAT-server/share/xcat/ib/scripts/annotatelog.README b/xCAT-server/share/xcat/ib/scripts/annotatelog.README new file mode 100644 index 000000000..ae9a07c8f --- /dev/null +++ b/xCAT-server/share/xcat/ib/scripts/annotatelog.README @@ -0,0 +1,245 @@ +# IBM_PROLOG_BEGIN_TAG +# This is an automatically generated prolog. +# +# +# +# Licensed Materials - Property of IBM +# +# (C) COPYRIGHT International Business Machines Corp. 2008 +# All Rights Reserved +# +# US Government Users Restricted Rights - Use, duplication or +# disclosure restricted by GSA ADP Schedule Contract with IBM Corp. +# +# IBM_PROLOG_END_TAG + +annotatelog.README + +This README describes how to use the annotatelog script. + +The syntax of the annotatelog command is: + +annotatelog -f log_file [-s start_time] [-e end_time] + { [-i -g guid_file -l link_file] [-S] [-c] [-u]| [-a -g guid_file -l link_file]} + {[-n node_list -g guid_file] [-E]} + [-h] + + -A Output the combination of -i, -S, -c and -u. It should be used with -g and -l flags. + -f log_file + Specifies a log file fullpath name to analyze. + Must be xCAT consolidated log got from Qlogic HSM or ESM. + -s start_time + Specifies the start time for analysis, where the start_time + variable has the format ddmmyyhh:mm:ss (day, month, year, + hour, minute, and second), 00:00:00 is valid. + -e end_time + Specifies the end time for analysis, where the end_time + variable has the format ddmmyyhh:mm:ss (day, month, year, + hour, minute, and second), 00:00:00 is valid. + -l link_file + Specifies a link file fullpath name, which concatenates all + '/var/opt/iba/analysis/baseline/fabric*links' files from all fabric management servers. + -g guid_file + Specifies a guid file fullpath name, which has a list of + GUIDs as obtained from the "getGuids" script. + -E Annotate with node ERRLOG_ON and ERRLOG_OFF information. This + can help determine if a disappearance was caused by a node + disappearing. It is for AIX nodes only and should be used with -x or -n flag. + -S Sort the log entries by subnet manager only. + -i Sort the log entries by IB node only. + -c Sort the log entries by chassis only. + -u Sort the log entries by FRU only. + -n node_list + Specifies a comma-separated list of node host names, IP addresses to look up in log entries. + -h Display usage information. + +In xCAT cluster with IB QLogic switches, the switch logs and subnet +manager (ESM/HSM) logs will use the syslog protocol for log redirection; +they are redirected to the xCAT Management Node. The xCAT Management Node syslogd recognizes the facility (local6) and priority (NOTICE and above) and put the log +entries into a file/FIFO that is being monitored by AIXSyslogSensor on +AIX system or ErrorLogSensor on Linux system. The condition-response +setup on xCAT Management Node local will move the log entries to file +/var/log/xcat/errorlog/[xCAT Management Node]. So there are a lot of +entries in this log file and it is difficult for the administrator to look through. + +annotatelog is a sample script to parse the QLogic log entries in file +/var/log/xcat/errorlog/[xCAT Management Node] on xCAT Management Server +by subnet manager, IB node, chassis, FRU(Field-Replaceable Unit) or a +particular node. This script is supported by both AIX and Linux Management +Node. From xCAT's point of view, the log to analyze must be xCAT +consolidated log, which means this log file must come from xCAT +syslog/errorlog monitoring mechanism, such as /var/log/xcat/errorlog/[xCAT +Management Node] file. Since the log format is various, xCAT do not +support other log files. + +This script provides several flags to specify the category critera, +they are -S, -i, -c, -u, -n and -A. +If -S flag is set, the output will be sorted by Subnet Manager, since +the SM may have multi-port, so the output is classified by +, please see the details in the +example below: + +############################################ +Logs by Subnet Manager +############################################ + +---------------------------------------------- +Report by subnet manager: 'c890f12ec07:port 2' +---------------------------------------------- +May 5 09:06:33 c890f12ec07 local6:notice c890f12ec07 iview_sm[5445]: +c890f12ec07; MSG:NOTICE|SM:c890f12ec07:port 2|COND:#4 Disappearance +from fabric|NODE:IBM G1 Logical HCA :port 2:0x000255007002651f|DETAIL: +Node type: hca +May 5 08:23:55 c890f12ec07 local6:notice c890f12ec07 iview_sm[5128]: +c890f12ec07; MSG:NOTICE|SM:c890f12ec07:port 2|COND:#3 Appearance in +fabric|NODE:IBM G1 Logical HCA :port 2:0x0002550070027f1f|DETAIL:Node +type: hca + +---------------------------------------------- +Report by subnet manager: 'c890f12ec07:port 1' +---------------------------------------------- + +May 5 09:06:33 c890f12ec07 local6:notice c890f12ec07 iview_sm[5442]: +c890f12ec07; MSG:NOTICE|SM:c890f12ec07:port 1|COND:#4 Disappearance +from fabric|NODE:IBM G1 Logical HCA :port 1:0x000255007002650f|DETAIL: +Node type: hca +May 5 09:06:33 c890f12ec07 local6:notice c890f12ec07 iview_sm[5442]: +c890f12ec07; MSG:NOTICE|SM:c890f12ec07:port 1|COND:#4 Disappearance +from fabric|NODE:IBM G1 Logical HCA :port 1:0x0002550070027f0f|DETAIL: +Node type: hca + + +If -i flag is set, the output will be sorted by IB node, it is +classified by < node_name: port number : GUID number>, furthermore, +if the log entry includes keyword of "IBM *Logical", then we will +display the link relationships between the IBM G1 Logic HCA and +IBM G1 Logic Switch, IBM G1 Logic Switch and Qlogic Switch. We will +use the result file of getGuids script to get the node name +corresponding to HCA port number and GUIDs; and use the fabric link +files from Fabric Management Server(HSM/ESM) to get the Qlogic swith +connection information, so -i flag must be used with -g and -l flags.; +and if the log entry does not include keyword of "IBM *Logical", we +could not get the corresponding nodename and connection relationship +for it, so this entry will be displayed directly and set the string +after "NODE:" as the IB node. Please see the details in the examples below: + +############################################ +Logs by IB node +############################################ + +---------------------------------------------- +Reported by IB node: 'c890f11ec06.ppd.pok.ibm.com: port 1: GUID 0x0002550070027f0f' +- ehca0 <-> IBM G1 Logical Switch 1 port 17 (GUID 0x0002550070027f20) +- Connector is C65-T1 (HV=Cx-T1) +- IBM G1 Logical Switch 1 <-> SilverStorm 9024 DDR port 16 (GUID 0x00066a00d900042d) +---------------------------------------------- + +May 5 09:06:33 c890f12ec07 local6:notice c890f12ec07 iview_sm[5442]: +c890f12ec07; MSG:NOTICE|SM:c890f12ec07:port 1|COND:#4 Disappearance +from fabric|NODE:IBM G1 Logical HCA :port 1:0x0002550070027f0f|DETAIL: +Node type: hca + +---------------------------------------------- +Reported by IB node: 'SilverStorm 9120 GUID=0x00066a0002000225 Leaf 8, Chip A:port 0:0x00066a0007001311' +---------------------------------------------- +Mar 25 16:21:54 c890f12ec07 iview_sm[3725]: c890f12ec07; MSG:NOTICE| +SM:c890f12ec07:port 1|COND:#4 Disappearance from fabric|NODE:SilverStorm +9120 GUID=0x00066a0002000225 Leaf 8, Chip A:port 0:0x00066a0007001311| +DETAIL:Node type: switch + + +If -c flag is set, the output will be sorted by chassis, it is classified +by , please see the details in the example +below: + +############################################################ +Logs by CHASSIS +############################################################ + +---------------------------------------------- +Reported by chassis: 'SilverStorm: model 9120: GUID 0x00066a0002000225' +---------------------------------------------- + +Apr 23 09:16:56 qswitch slot101:9.114.80.179;MSG:WARNING|CHASSIS:SilverStorm +9120 GUID=0x00066a0002000225|COND:#17 FRU state changed from online +to offline|FRU:Power Supply 1|PN:200805-101 + + +If -u flag is set, the output will be sorted by FRU, please see the details in +the example below: + +############################################################ +Logs by FRUS from CHASSIS +############################################################ + +------------------------------------------------------------ +Associated with FRU: 'Power Supply 1' +------------------------------------------------------------ + +Apr 23 09:19:12 qswitch slot101:9.114.80.179;MSG:NOTICE|CHASSIS:SilverStorm +9120 GUID=0x00066a0002000225|COND:#18 FRU state changed from offline +to online|FRU:Power Supply 1|PN:200805-101 + + +If -n flag is set, the output will be sorted by a certain nodename, please see +the details in the example below: + +############################################################ +Logs for special nodes +############################################################ +------------------------------------------------------------ +Reported by node: 'c890f11ec06.ppd.pok.ibm.com' +------------------------------------------------------------ + +May 5 08:23:55 c890f12ec07 local6:notice c890f12ec07 iview_sm[5128]: +c890f12ec07; MSG:NOTICE|SM:c890f12ec07:port 2|COND:#3 Appearance in fabric| +NODE:IBM G1 Logical HCA :port 2:0x0002550070027f1f|DETAIL:Node type: hca +May 5 09:06:33 c890f12ec07 local6:notice c890f12ec07 iview_sm[5442]: +c890f12ec07; MSG:NOTICE|SM:c890f12ec07:port 1|COND:#4 Disappearance from +fabric|NODE:IBM G1 Logical HCA :port 1:0x0002550070027f0f|DETAIL:Node +type: hca + +------------------------------------------------------------ +Reported by node: 'c890f11ec05.ppd.pok.ibm.com' +------------------------------------------------------------ +May 5 09:06:33 c890f12ec07 local6:notice c890f12ec07 iview_sm[5445]: +c890f12ec07; MSG:NOTICE|SM:c890f12ec07:port 2|COND:#4 Disappearance from +fabric|NODE:IBM G1 Logical HCA :port 2:0x000255007002651f|DETAIL:Node +type: hca + + +If -E flag is set with -n or -i flag, annotatelog script will use dsh to access +the IB nodes or node list specified by -n flag, and use "errpt -J ERRLOG_ON +ERRLOG_OFF" command to get the corresponding timestamps, and added these +timestamps into annotatelog output. Please see the detail below: + +---------------------------------------------- +Reported by IB node: 'c890f11ec06.ppd.pok.ibm.com: port 1: GUID 0x0002550070027f0f' +- ehca0 <-> IBM G1 Logical Switch 1 port 17 (GUID 0x0002550070027f20) +- Connector is C65-T1 (HV=Cx-T1) +- IBM G1 Logical Switch 1 <-> SilverStorm 9024 DDR port 16 (GUID 0x00066a00d900042d) +---------------------------------------------- +# c890f11ec06.ppd.pok.ibm.com +# ERRLOG_ON: 01/05/08 09:23 +# ERRLOG_OFF: 04/05/08 09:23 +---------------------------------------------- + +May 5 09:06:33 c890f12ec07 local6:notice c890f12ec07 iview_sm[5442]: +c890f12ec07; MSG:NOTICE|SM:c890f12ec07:port 1|COND:#4 Disappearance from +fabric|NODE:IBM G1 Logical HCA :port 1:0x0002550070027f0f|DETAIL:Node +type: hca + + +If -A flag is set, the output will be the combination of -i, -S, -c and -u. + +If a log entry that can not be parsed into any types above, it will be displayed +in "Logs by others". And these logs can only be displayed when -A flag is used, +please see the details in the example below: + +############################################################ +Logs by others +############################################################ +May 14 11:44:22 qswitch slot101:9.114.80.179 FEtask[86f38fb8]: ESM: Embedded SM +Error: rmsg_recv: output buffer[2016] too small for incoming data[2035] : 0 +May 14 11:44:22 qswitch slot101:9.114.80.179 PM_task[86f08298]: ESM: Embedded SM +Error: DoSendFeAsync - message send failed : 128 diff --git a/xCAT-server/share/xcat/ib/scripts/configiba b/xCAT-server/share/xcat/ib/scripts/configiba new file mode 100644 index 000000000..f5033a6c7 --- /dev/null +++ b/xCAT-server/share/xcat/ib/scripts/configiba @@ -0,0 +1,88 @@ +#!/usr/bin/perl +# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html + +# Sample xCAT post script for configuring secondary adatper based on eth0 +# settings and some conventions. This scripts works for both diskfull installs # and diskless boots. + +use Socket; + +#Check the platform +my $PLTFRM = `uname`; +chomp $PLTFRM; + +my @nums = (0..3); +foreach my $num (@nums) { + # Take primary node name, add -ib$num and then reverse resolve to get what ip should be + my $nic = "ib$num"; + my $hostname = "$ENV{NODE}-$nic"; + my $packed_ip = gethostbyname($hostname); + if (!$packed_ip) { system("logger -t xcat 'configiba: cannot resolve $hostname.'"); exit 1; } + my $ip = inet_ntoa($packed_ip); + #TODO: should contact xcatd on the service node to get the netmask and gateway from the networks table + my $netmask = "255.255.255.0"; + my ($first, $second, $rest) = split(/\./, $ip); + my $gateway = "$first.$second.255.254"; + + if ($PLTFRM eq "Linux") { + # Write the info to the ifcfg file + my $dir = "/etc/sysconfig/network-scripts"; + if (!open(FILE, ">$dir/ifcfg-$nic")) { system("logger -t xcat 'configiba: cannot open $dir/ifcfg-$nic.'"); exit 1; } + print FILE "DEVICE=$nic\n"; + print FILE "BOOTPROTO=none\n"; + print FILE "IPADDR=$ip\n"; + print FILE "NETMASK=$netmask\n"; + print FILE "GATEWAY=$gateway\n"; + print FILE "ONBOOT=yes\n"; + close FILE; + runcmd("$dir/ifup $nic"); + system("logger -t xcat 'configiba: successfully configured $nic.'"); + } elsif ($PLTFRM eq "AIX") { + #Check whether the icm is available + my $cmd = "lsdev -C | grep icm | grep Available"; + `$cmd`; + if ($?) { + my $iba_cmd = "mkdev -c management -s infiniband -t icm"; + runcmd($iba_cmd); + if ($?) { + my $iba_command = "mkdev -l icm"; + runcmd($iba_command); + if ($?) { exit $?; } + } + } + #Configure the IB interfaces + my $iba_num = int($num / 2); + my $ib_adapter = "iba$iba_num"; + my $port; + if ($num % 2 ==0) { $port = 1; } + else { $port = 2; } + my $mkib_cmd = "mkiba -a $ip -i $nic -A $ib_adapter -p $port -P -1 -S up -m $netmask"; + runcmd($mkib_cmd); + system("logger -t xcat 'configiba: successfully configured $nic.'"); + } +} +exit 0; + +sub runcmd { + my $cmd = shift @_; + $cmd .= ' 2>&1'; + my @output = `$cmd`; + my $rc = $? >> 8; + if ($rc) { + system("logger -t xcat 'configiba: command $cmd failed with rc $rc: " . join('',@output) . "'"); + return $rc; + } +} + + + + + + + + + + + + + + diff --git a/xCAT-server/share/xcat/ib/scripts/configiba.README b/xCAT-server/share/xcat/ib/scripts/configiba.README new file mode 100644 index 000000000..4f5477000 --- /dev/null +++ b/xCAT-server/share/xcat/ib/scripts/configiba.README @@ -0,0 +1,68 @@ +# +# +# Licensed Materials - Property of IBM +# +# (C) COPYRIGHT International Business Machines Corp. 2008 +# All Rights Reserved +# +# US Government Users Restricted Rights - Use, duplication or +# disclosure restricted by GSA ADP Schedule Contract with IBM Corp. +# +# IBM_PROLOG_END_TAG + +configiba.README + +This README describes how to use the configiba script. + +This configiba postscript is used to do secondary adapter configuration for InfiniBand interfaces (ibX) and ml0 on both AIX and Linux Managed Nodes. Please copy this script to postscripts directory before configuration, generally the folder is /install/postscripts. + +There are two times to configure IB adapters, during node installation or with command updatenode after node installation. These two IB configuration ways are similar and finally they will trigger this sample postscript to configure IB adapters. + +To use this script, users need to firstly setup DNS for the new adapters before node installation or before triggering command updatenode depending on when to start IB configuration, either time is supported: + +1.The IP address entries for IB interfaces in /etc/hosts on xCAT managed nodes should have the node short hostname and the unique IB interface name in them. The format should be . +For example: +c890f11ec01 is the node short hostname, c890f11ec01-ib0, c890f11ec01-ib1, c890f11ec01-ib2, etc. are the IP names for the IB interfaces on c890f11ec01. + +2.Update networks table with IB sub-network +For example: +chtab net=172.16.0.0 networks.netname=ib0 networks.mask=255.255.0.0 networks.mgtifname=ib +Note: Attributes gateway, dhcpserver, tftpserver, and nameservers in networks table are not a MUST to be assigned, since the xCAT management work is still running on ethernet. + +3.Add the entries in the /etc/hosts into DNS and restart the DNS +For Linux Managed Nodes: +makedns +service named restart +For AIX Managed Nodes: +makedns +stopsrc -s named +startsrc -s named +lssrc -s named +Node: Only if the state of named is active, we can continue to test DNS. + +4.Check if DNS for the IB network has been setup successfully +nslookup [node_short_hostname-ib_interfacename] + +5.Add this script to postscripts +chtab node=c890f11ec01 postscripts.postscripts=configiba +Note: please include other postscripts what you need also. + +6.Now all the preparation work for IB configuration has been done, user can continue the general node installation or trigger updatenode command to configure IB adapters. + +As a sample postscript, the netmask is set to default value: 255.255.0.0 and gateway is set to "X.X.255.254". If the IB interface name is not a simple combination of short hostname and ibX or netmask and gateway cannot meet user's requirement, user can modify this scripts by himself, like the example below: + +The node short hostname is 890f11ec01-en, and the IB interface name is 890f11ec01-ib0, c890f11ec01-ib1, etc. The user needs to modify +my $hostname = "$ENV{NODE}-$nic"; +to +my $fullname = `echo $ENV{NODE} | cut -c 1-11`; +chomp($fullname); +my $hostname = "$fullname-$nic"; + +We assume every node have two IB adapters, if only one adapter is available on each node, please modify: +my @nums = (0..3); +to +my @nums = (0..1); + +Note: Currently, for diskless installation, there is no /etc/resolv.conf generated for the compute node. This makes the compute node cannot resolve the name services on management node. User needs to use rcp to copy resolv.conf to compute node as a workarround. The fomat of resolv.conf is like: +domain ppd.pok.ibm.com +nameserver 172.16.0.1 diff --git a/xCAT-server/share/xcat/ib/scripts/getGuids b/xCAT-server/share/xcat/ib/scripts/getGuids new file mode 100644 index 000000000..fcaa077d6 --- /dev/null +++ b/xCAT-server/share/xcat/ib/scripts/getGuids @@ -0,0 +1,653 @@ +#!/usr/bin/perl +# IBM_PROLOG_BEGIN_TAG +# This is an automatically generated prolog. +# +# +# +# Licensed Materials - Property of IBM +# +# (C) COPYRIGHT International Business Machines Corp. 2008 +# All Rights Reserved +# +# US Government Users Restricted Rights - Use, duplication or +# disclosure restricted by GSA ADP Schedule Contract with IBM Corp. +# +# IBM_PROLOG_END_TAG + + +########################################################################### +# # +# Command: getGuids # +# # +#-------------------------------------------------------------------------# +# This xCAT script will use dsh to get the Guids from Linux nodes # +# and AIX nodes, and save the results to file /opt/xcat/samples/ib/ # +# Guids.xcat, log file is /var/log/xcat/getGuids.log. # +# Command Syntax: # +# getGuids [-h] [-f output_file] # +# -f output_file # +# Specifies a file full path name that is used to save the # +# GUIDs output. # +# -h # +# Display usage information. # +# Exit codes: # +# 0 - success # +# 1 - fail # +########################################################################### + +use strict; +use Getopt::Long; + +# Log file +$::GUIDS_LOG = "/var/log/xcat/getGuids.log"; +$::DEFAULT_RESULT_FILE = "/var/opt/xcat/ib/Guids.xcat"; + +# variables and Commands +$::OK = 0; +$::NOK = 1; +$::logging = 0; +$::GLOBAL_EXIT = 0; +$::NODEGRP = "/opt/xcat/bin/nodegrp"; +$::LinuxIBCmd = "/usr/bin/ibv_devinfo"; +$::AIXIBCmd = "/usr/bin/ibstat"; + +# MAIN Main main# +&getArgs; + +# Append logging information to getGuids.log +&append_logging($::GUIDS_LOG); +$::logging++; + +local *FILE; +unless (open(FILE, ">$::RESULT_FILE")) +{ + print "Can't open file $::RESULT_FILE for writing.\n"; + print $::LOG_FILE_HANDLE + "Can't open file $::RESULT_FILE for writing.\n"; + + $::GLOBAL_EXIT = $::NOK; + exit; +} + +# get Linux nodes +my @LnxNodes = `$::NODEGRP LinuxNodes`; +print $::LOG_FILE_HANDLE "Running command: $::NODEGRP LinuxNodes\n"; +chomp @LnxNodes; + +my @ReachableLnxNodes; +my @UnreachableLnxNodes; +my @ValidLnxNodes; +my @BadLnxNodes; +my $num = scalar(@LnxNodes); +if ($num > 0) +{ + # Handle Linux Nodes + # Check if dsh is reachable + foreach my $node (@LnxNodes) + { + my $rc = &checkDshReachability($node); + if ($rc == 0) # dsh is ok + { + push @ReachableLnxNodes, $node; + } + else + { + push @UnreachableLnxNodes, $node; + } + } + + if (scalar (@UnreachableLnxNodes)) + { + my $UnreachableLnxNodes = join (", ", @UnreachableLnxNodes); + print + "Warning: dsh is unreachable for the node(s): $UnreachableLnxNodes.\n" . + "Please use updatenode command to configure it.\n"; + print $::LOG_FILE_HANDLE + "Warning: dsh is unreachable for the node(s): $UnreachableLnxNodes.\n" . + "Please use updatenode command to configure it.\n"; + } + + foreach my $node (@ReachableLnxNodes) + { + my $rc = &checkIBCmdAvailability($node, "Linux"); + if ($rc == 0) + { + push @ValidLnxNodes, $node; + } + else + { + push @BadLnxNodes, $node; + } + } + + if (scalar (@BadLnxNodes)) + { + my $BadLnxNodes = join (", ", @BadLnxNodes); + print + "Warning: Command $::LinuxIBCmd is not available on the node(s): $BadLnxNodes.\nPlease ensure the libibverbs rpm is installed.\n"; + print $::LOG_FILE_HANDLE + "Warning: Command $::LinuxIBCmd is not available on the node(s): $BadLnxNodes.\nPlease ensure the libibverbs rpm is installed.\n"; + } + + if (scalar (@ValidLnxNodes)) + { + my $rc = &getLinuxGUIDS(\@ValidLnxNodes); + if ($rc) + { + $::GLOBAL_EXIT = $rc; + exit; + } + } +} + +# get AIX nodes +my @AIXNodes = `$::NODEGRP AIXNodes`; +print $::LOG_FILE_HANDLE "Running command: $::NODEGRP AIXNodes\n"; +chomp @AIXNodes; + +my @ReachableAIXNodes; +my @UnreachableAIXNodes; +my @ValidAIXNodes; +my @BadAIXNodes; +my $num = scalar(@AIXNodes); +if ($num > 0) +{ + # Handle AIX Nodes + # Check if dsh is reachable + foreach my $node (@AIXNodes) + { + my $rc = &checkDshReachability($node); + if ($rc == 0) # dsh is ok + { + push @ReachableAIXNodes, $node; + } + else + { + push @UnreachableAIXNodes, $node; + } + } + + if (scalar (@UnreachableAIXNodes)) + { + my $UnreachableAIXNodes = join (", ", @UnreachableAIXNodes); + print + "Warning: The dsh is unreachable for the node(s): $UnreachableAIXNodes.\n" . + "Please use updatenode command to configure it.\n"; + print $::LOG_FILE_HANDLE + "Warning: The dsh is unreachable for the node(s): $UnreachableAIXNodes.\n" . + "Please use updatenode command to configure it.\n"; + } + + foreach my $node (@ReachableAIXNodes) + { + my $rc = &checkIBCmdAvailability($node, "AIX"); + if ($rc == 0) + { + push @ValidAIXNodes, $node; + } + else + { + push @BadAIXNodes, $node; + } + } + + if (scalar (@BadAIXNodes)) + { + my $BadAIXNodes = join (", ", @BadAIXNodes); + print + "Warning: Command $::AIXIBCmd is not available on the node(s): $BadAIXNodes.\nPlease ensure the devices.common.IBM.ib.rte fileset is installed.\n"; + print $::LOG_FILE_HANDLE + "Warning: Command $::AIXIBCmd is not available on the node(s): $BadAIXNodes.\nPlease ensure the devices.common.IBM.ib.rte fileset is installed.\n"; + } + + if (scalar (@ValidAIXNodes)) + { + my $rc = &getAIXGUIDS(\@ValidAIXNodes); + if ($rc) + { + $::GLOBAL_EXIT = $rc; + exit; + } + } +} + +if ((scalar(@LnxNodes) + scalar(@AIXNodes)) <= 0) +{ + print "There is no nodes defined on this MN\n"; + print $::LOG_FILE_HANDLE "There is no nodes defined on this MN\n"; + $::GLOBAL_EXIT = $::NOK; + exit; +} + +print "The GUIDs are saved to file $::RESULT_FILE.\n"; + +# Finish up and exit +END +{ + close FILE; + if ($::logging) + { + &stop_logging(); + } + + #Determine exit code + if ($::GLOBAL_EXIT > $?) + { + $? = $::GLOBAL_EXIT; + } +} + +exit; # end of Main + +#-------------------------------------------------------------------------------- + +=head3 getArgs + + Parse the command line and check the values +=cut + +#-------------------------------------------------------------------------------- +sub getArgs() +{ + GetOptions( + 'h' => \$::HELP, + 'f=s' => \$::RESULT_FILE + ); + if ($::HELP) + { + &usage; + $::GLOBAL_EXIT = $::OK; + exit; + } + if (!$::RESULT_FILE) + { + $::RESULT_FILE = $::DEFAULT_RESULT_FILE; + if (!-e "/var/opt/xcat/ib/") + { + `mkdir /var/opt/xcat/ib/`; + if ($?) + { + $::GLOBAL_EXIT = $?; + exit; + } + } + } +} + +#-------------------------------------------------------------------------------- + +=head3 usage + + usage for getGuids +=cut + +#-------------------------------------------------------------------------------- +sub usage() +{ + print + "Usage: getGuids [-h] [-f output_file] + -f output_file + Specifies a file full path name that is used to save the GUIDs output. + -h + Display usage information.\n"; +} + +#-------------------------------------------------------------------------------- + +=head3 checkDshReachability + + Notes: Check the dsh reachability between the Management Nodes + and node. + +Arguments: + $node - the remote node hostname. + +Returns: + $::OK - The remote node is reachable through dsh. + $::NOK - The remote node is unreachable through dsh. + +=cut + +#-------------------------------------------------------------------------------- +sub checkDshReachability() +{ + my ($node) = @_; + + my $output = `dsh -Q -n $node date 2>/dev/null`; + print $::LOG_FILE_HANDLE "Running command: dsh -Q -n $node date 2>/dev/null\n"; + if ($? == $::OK) + { + return $::OK; + } + return $::NOK; +} + +#-------------------------------------------------------------------------------- + +=head3 checkIBCmdAvailability + + Notes: Check availability of the IB command on the node. + +Arguments: + $node - the remote node hostname. + $os - the os type of the node + +Returns: + $::OK - The IB command is available on the node. + $::NOK - The IB command is not available on the node. + +=cut + +#-------------------------------------------------------------------------------- +sub checkIBCmdAvailability() +{ + my ($node, $os) = @_; + my $output; + if ($os eq "Linux") { + $output = `dsh -Q -n $node ls $::LinuxIBCmd 2>/dev/null`; + print $::LOG_FILE_HANDLE "Running command: dsh -Q -n $node ls $::LinuxIBCmd 2>/dev/null\n" + } + else { + $output = `dsh -Q -n $node ls $::AIXIBCmd 2>/dev/null`; + print $::LOG_FILE_HANDLE "Running command: dsh -Q -n $node ls $::AIXIBCmd 2>/dev/null\n" + } + + #print "Here: " . $output; + if ($? == $::OK) + { + return $::OK; + } + return $::NOK; +} + +#------------------------------------------------------------------------------- + +=head3 append_logging + + Append logging messages to a logfile. + +=cut + +#------------------------------------------------------------------------------- +sub append_logging() +{ + #my ($class, $logfile) = @_; + my ($logfile) = @_; + my ($cmd, $rc); + + # + # get log file ready + # + if (!-e $logfile) + { + # create the log file if not already there + unless (open(LOGFILE, ">$logfile")) + { + # Cannot open file + print + "Can't open file \"$logfile\" for writing.\n"; + return $::NOK; + } + } + else + { + # it's there so just append + unless (open(LOGFILE, ">>$logfile")) + { + print "Can't update file \"$logfile\".\n"; + return $::NOK; + } + } + + $::LOG_FILE_HANDLE = \*LOGFILE; + + # Print the date to the top of the logfile + my $sdate = `/bin/date`; + chomp $sdate; + print "Output log is being written to \"$logfile\".\n"; + + print $::LOG_FILE_HANDLE + "---------------------------------------------------------------------\n"; + print $::LOG_FILE_HANDLE "Logging started $sdate.\n"; + print $::LOG_FILE_HANDLE + "---------------------------------------------------------------------\n"; + + return $::OK; +} + +#------------------------------------------------------------------------------- + +=head3 stop_logging + + Turn off message logging. + +=cut + +#------------------------------------------------------------------------------- +sub stop_logging() +{ + + # Print the date at the bottom of the logfile + my $sdate = `/bin/date`; + chomp $sdate; + print $::LOG_FILE_HANDLE + "---------------------------------------------------------------------\n"; + print $::LOG_FILE_HANDLE "Logging stopped $sdate.\n"; + print $::LOG_FILE_HANDLE + "---------------------------------------------------------------------\n"; + + close($::LOG_FILE_HANDLE); + $::LOG_FILE_HANDLE = undef; + + return $::OK; +} + +#------------------------------------------------------------------------------- + +=head3 getAIXGUIDS + + Get GUIDs from AIX nodes. +Arguments: + $refAIXNodes - The reference to the group of AIX nodes. +=cut + +#------------------------------------------------------------------------------- +sub getAIXGUIDS() +{ + my ($refAIXNodes) = @_; + my $AIXNodes = join (",", @$refAIXNodes); + + print "Getting GUIDs from AIX nodes...\n"; + print $::LOG_FILE_HANDLE "Getting GUIDs from AIX nodes...\n"; + + my $getCmd = "dsh -n $AIXNodes $::AIXIBCmd -v 2>/dev/null"; + print $::LOG_FILE_HANDLE "Running command: $getCmd.\n"; + my @output = `$getCmd`; + + if ($?) + { + print "Command failed: $getCmd.\n"; + print $::LOG_FILE_HANDLE "Command failed: $getCmd.\n"; + return $::NOK; + } + + my $oldhost = ""; + my $host = ""; + my $dev = ""; + my $guid = ""; + my $port = ""; + my $gid = ""; + my $baseguid = ""; + my $lsw0 = ""; + my $lsw1 = ""; + + foreach my $line (@output) + { + chomp $line; + + # Get node hostname + if ($line =~ /(\S*):.*/) + { + $host = $1; + if ($host ne $oldhost) + { + print FILE "Node name is $host.\n"; + print $::LOG_FILE_HANDLE "Node name is $host.\n"; + $oldhost=$host; + } + } + + # Get device name + if ($line =~ /.*IB NODE INFORMATION.*\((\S*)\).*/) + { + $dev = $1; + } + + # Get device GUID + if ($line =~ /.*\(GUID\):.*\s+(\S*)/) + { + $guid = $1; + $guid=~s/\.//g; + $baseguid=$guid; + $baseguid=~s/..$//; + $lsw0=$guid; + $lsw0=~s/..$/80/; + $lsw1=$guid; + $lsw1=~s/..$/81/; + + print FILE "$host: $dev: baseguid: $baseguid\n"; + print FILE "$host: $dev: dev: $guid\n"; + print FILE "$host: $dev: lsw0: $lsw0\n"; + print FILE "$host: $dev: lsw1: $lsw1\n"; + + print $::LOG_FILE_HANDLE "$host: $dev: baseguid: $baseguid\n"; + print $::LOG_FILE_HANDLE "$host: $dev: dev: $guid\n"; + print $::LOG_FILE_HANDLE "$host: $dev: lsw0: $lsw0\n"; + print $::LOG_FILE_HANDLE "$host: $dev: lsw1: $lsw1\n"; + } + + # Get port number under device(iba) + if ($line =~ /\s*IB PORT (\S*) INFORMATION.*/) + { + $port = $1; + } + + # Get GUID under port + if ($line =~ /.*GUID\[.*\s+(\S*)/) + { + $gid = $1; + $gid=~s/\.//g; + print FILE "$host: $dev: portGUID_$port: $gid\n"; + print $::LOG_FILE_HANDLE "$host: $dev: portGUID_$port: $gid\n"; + } + + } + return $::OK; +} + +#------------------------------------------------------------------------------- + +=head3 getLinuxGUIDS + + Get GUIDs from Linux nodes. +Arguments: + $refLnxNodes - The reference to the group of Linux nodes. +=cut + +#------------------------------------------------------------------------------- +sub getLinuxGUIDS() +{ + my ($refLnxNodes) = @_; + my $LnxNodes = join (",", @$refLnxNodes); + + print + "Getting GUIDs from Linux nodes...\n"; + print $::LOG_FILE_HANDLE + "Getting GUIDs from Linux nodes...\n"; + + my $getCmd = "dsh -n $LnxNodes $::LinuxIBCmd -v 2>/dev/null"; + print $::LOG_FILE_HANDLE "Running command: $getCmd.\n"; + my @output = `$getCmd`; + + if ($?) + { + print + "Command failed: $getCmd.\n"; + print $::LOG_FILE_HANDLE + "Command failed: $getCmd.\n"; + return $::NOK; + } + + my $oldhost = ""; + my $host =""; + my $dev = ""; + my $guid = ""; + my $port = ""; + my $gid = ""; + my $baseguid = ""; + my $lsw0 = "" ; + my $lsw1 = ""; + + foreach my $line (@output) + { + chomp $line; + # Get node hostname + if ($line =~ /(\S*):.*/) + { + $host = $1; + if ($host ne $oldhost) + { + print FILE "Node name is $host.\n"; + print $::LOG_FILE_HANDLE "Node name is $host.\n"; + $oldhost=$host; + } + } + + # Get device name + if ($line =~ /.*hca_id:\s(\S*).*/) + { + $dev = $1; + } + + # Get node_guid under hca_id + if ($line =~ /.*node_guid:\s*(\S*)/) + { + $guid = $1; + $guid =~s/://g; + $baseguid = $guid; + $baseguid =~s/..$//; + $lsw0 = $guid; + $lsw0 =~s/..$/80/; + $lsw1 = $guid; + $lsw1 =~s/..$/81/; + + print FILE "$host: $dev: baseguid: $baseguid\n"; + print FILE "$host: $dev: dev: $guid\n"; + print FILE "$host: $dev: lsw0: $lsw0\n"; + print FILE "$host: $dev: lsw1: $lsw1\n"; + + print $::LOG_FILE_HANDLE "$host: $dev: baseguid: $baseguid\n"; + print $::LOG_FILE_HANDLE "$host: $dev: dev: $guid\n"; + print $::LOG_FILE_HANDLE "$host: $dev: lsw0: $lsw0\n"; + print $::LOG_FILE_HANDLE "$host: $dev: lsw1: $lsw1\n"; + } + + # Get port number under hca_id + if ($line =~ /port:\s*(\S*).*/) + { + $port = $1; + } + + # Get GID under port + if ($line =~ /.*GID.* *(\S*:\S*:\S*:\S*).*/) + { + $gid = $1; + $gid=~s/://g; + my $prefix = substr $baseguid, 0, 4; + $gid = $prefix . $gid; + print FILE "$host: $dev: portGUID_$port: $gid\n"; + print $::LOG_FILE_HANDLE "$host: $dev: portGUID_$port: $gid\n"; + } + } + return $::OK; +} diff --git a/xCAT-server/share/xcat/ib/scripts/getGuids.README b/xCAT-server/share/xcat/ib/scripts/getGuids.README new file mode 100644 index 000000000..386f93640 --- /dev/null +++ b/xCAT-server/share/xcat/ib/scripts/getGuids.README @@ -0,0 +1,69 @@ +# IBM_PROLOG_BEGIN_TAG +# This is an automatically generated prolog. +# +# +# +# Licensed Materials - Property of IBM +# +# (C) COPYRIGHT International Business Machines Corp. 2008 +# All Rights Reserved +# +# US Government Users Restricted Rights - Use, duplication or +# disclosure restricted by GSA ADP Schedule Contract with IBM Corp. +# +# IBM_PROLOG_END_TAG + +getGuids.README + +This README describes how to use the getGuids script. + +The syntax of the getGuids command is: + +getGuids [-h] [-f output_file] + -f output_file + Specifies a file full path name that is used to save the + GUIDs output. + -h + Display usage information. + +The getGuids is a sample script to get GUIDs for Infiniband Galaxy HCAs +(Host Channel Adapter) and their ports from xCAT Management Nodes. It +needs to be run from xCAT Management Node, and will use dsh to all +the xCAT Managed Nodes so that it could use ibstat command on AIX system +or ibv_devinfo command on Linux system to get the information about the +IB devices. + +If dsh is unreachable for certain nodes, getGuids will give a warning +message to the user and recommend configuring dsh for these nodes. If +ibstat or ibv_devinfo command is not available on the target nodes, +getGuids will give a warning message to indicate that could not get +GUIDs on these nodes. + +The output of this script will be printed to the screen and also saved +into output file, which could be specified by -f flag; otherwise, xCAT +will use the default output file /var/opt/xcat/ib/Guids.xcat. + +The format for the output file is , for example: +-------------------------------------------------- +c890f11ec05.ppd.pok.ibm.com: ehca0: baseguid: 00025500700265 +c890f11ec05.ppd.pok.ibm.com: ehca0: dev: 0002550070026500 +c890f11ec05.ppd.pok.ibm.com: ehca0: lsw0: 0002550070026580 +c890f11ec05.ppd.pok.ibm.com: ehca0: lsw1: 0002550070026581 +c890f11ec05.ppd.pok.ibm.com: ehca0: portGID_1: 000255007002650f +c890f11ec05.ppd.pok.ibm.com: ehca0: portGID_2: 000255007002651f +-------------------------------------------------- + +This output file will be used by annotatelog script to organize the log +entries gathered onto the xCAT MN through remote logging. This script is +not necessary for non-Galaxy adapters because users can give the adapter +names directly and do not need the GUID map. + +getGuids will get IB GUIDs from all the AIX nodes and Linux nodes that +are defined on the xCAT Management Node (MN); for the nodes that are +not defined on xCAT MN, if user wants to get GUIDs for these nodes too, +please define them as xCAT Management Node first and configure dsh before +using getGuides script. + +The log file for the script getGuids is /var/log/xcat/getGuids.log. +The supported hardware types of the nodes gathered GUIDs from are POWER6 +HV4/HV4+, POWER6 HV8/HV8+ and POWER6 IH. diff --git a/xCAT-server/xCAT-server.spec b/xCAT-server/xCAT-server.spec index 6a280eee4..df6f968ef 100644 --- a/xCAT-server/xCAT-server.spec +++ b/xCAT-server/xCAT-server.spec @@ -42,6 +42,7 @@ mkdir -p $RPM_BUILD_ROOT/%{prefix}/share/xcat/scripts mkdir -p $RPM_BUILD_ROOT/%{prefix}/share/xcat/tools mkdir -p $RPM_BUILD_ROOT/%{prefix}/share/xcat/cons mkdir -p $RPM_BUILD_ROOT/%{prefix}/share/xcat/rollupdate +mkdir -p $RPM_BUILD_ROOT/%{prefix}/share/xcat/ib/scripts mkdir -p $RPM_BUILD_ROOT/%{prefix}/lib/perl/xCAT_plugin mkdir -p $RPM_BUILD_ROOT/opt/xcat/xdsh/Context mkdir -p $RPM_BUILD_ROOT/%{prefix}/lib/perl/xCAT_monitoring/samples @@ -80,6 +81,7 @@ cp share/xcat/scripts/* $RPM_BUILD_ROOT/%{prefix}/share/xcat/scripts cp share/xcat/tools/* $RPM_BUILD_ROOT/%{prefix}/share/xcat/tools cp share/xcat/rollupdate/* $RPM_BUILD_ROOT/%{prefix}/share/xcat/rollupdate cp share/xcat/cons/* $RPM_BUILD_ROOT/%{prefix}/share/xcat/cons +cp share/xcat/ib/scripts/* $RPM_BUILD_ROOT/%{prefix}/share/xcat/ib/scripts chmod 755 $RPM_BUILD_ROOT/%{prefix}/share/xcat/cons/* ln -sf /%{prefix}/share/xcat/cons/hmc $RPM_BUILD_ROOT/%{prefix}/share/xcat/cons/ivm