#!/usr/bin/perl # IBM(c) 2008 EPL license http://www.eclipse.org/legal/epl-v10.html ########################################################################### # # # Command: annotatelog # # # #-------------------------------------------------------------------------# # This xCAT script is used to parse the QLogic log entries in file # /var/log/xcat/errorlog/[xCAT Management Nodes] on CSM Management # Node by subnet manager, IB node, chassis, FRU(Field-Replaceable Unit) or a # particular node. This script is supported by both AIX and Linux MN. # From xCAT's point of view, the log to analyze must be xCAT # consolidated log, which means this log file must come from xCAT # syslog/errorlog monitoring mechanism, such as # /var/log/xcat/errorlog/[xCAT Management Nodes] file. Since the log format # is various, xCAT do not support other log files. # Command Syntax: # annotatelog -f log_file [-s start_time] [-e end_time] # { [-i -g guid_file -l link_file] [-S] [-c] [-u]| [-a -g guid_file -l link_file]} # {[-n node_list -g guid_file] [-E]} # [-h] # # -A Output the combination of -i, -S, -c and -u. It should be used with -g and -l flags. # -f log_file # Specifies a log file fullpath name to analyze. # Must be xCAT consolidated log got from Qlogic HSM or ESM. # -s start_time # Specifies the start time for analysis, where the start_time # variable has the format ddmmyyhh:mm:ss (day, month, year, # hour, minute, and second), 00:00:00 is valid. # -e end_time # Specifies the end time for analysis, where the end_time # variable has the format ddmmyyhh:mm:ss (day, month, year, # hour, minute, and second), 00:00:00 is valid. # -l link_file # Specifies a link file fullpath name, which concatenates all # '/var/opt/iba/analysis/baseline/fabric*links' files from all fabric management nodes. # -g guid_file # Specifies a guid file fullpath name, which has a list of # GUIDs as obtained from the "getGuids" script. # -E Annotate with node ERRLOG_ON and ERRLOG_OFF information. This # can help determine if a disappearance was caused by a node # disappearing. It is for AIX nodes only and should be used with -x or -n flag. # -S Sort the log entries by subnet manager only. # -i Sort the log entries by IB node only. # -c Sort the log entries by chassis only. # -u Sort the log entries by FRU only. # -n node_list # Specifies a comma-separated list of node host names, IP addresses to look up in log entries. # -h Display usage information. # Exit codes: # 0 - success # 1 - fail ########################################################################### use Net::Config; use Getopt::Long; use Time::Local; #Maps between HCA adapter and connector %::HCAMAP = ( "iba0_lsw1" => "C65-T1 (HV=Cx-T1)", "iba0_lsw2" => "C65-T2 (HV=Cx-T2)", "iba1_lsw1" => "C65-T3", "iba1_lsw2" => "C65-T4", "iba2_lsw1" => "C66-T1", "iba2_lsw2" => "C66-T2", "iba3_lsw1" => "C66-T3", "iba3_lsw2" => "C66-T4", "ehca0_lsw1" => "C65-T1 (HV=Cx-T1)", "ehca0_lsw2" => "C65-T2 (HV=Cx-T2)", "ehca1_lsw1" => "C65-T3", "ehca1_lsw2" => "C65-T4", "ehca2_lsw1" => "C66-T1", "ehca2_lsw2" => "C66-T2", "ehca3_lsw1" => "C66-T3", "ehca3_lsw2" => "C66-T4", ); $::OK = 0; $::NOK = 1; $::SHSEP = "###############################################################################"; $::DASEP = "-------------------------------------------------------------------------------"; #Logs sorted by kinds of method %::SMLOGS; %::FRULOGS; %::CHASSISLOGS; %::NODELOGS; %::NODELOGSBYNAME; %::OTHERLOGS; #The start time and end time of logs that will be analyzed $::Start_Epoch; $::End_Epoch; #Record the relationship between guid and it's information %::GUIDS; #Record links between HCA and Switch and links between Switch and Swtich %::HCALinks; #ERRLOG_ON and ERRLOG_OFF information %::ELOGON; %::ELOGOFF; #Information related with a node %::NodeInfo; #List of nodes that their logs will be analyzed @::NodeList; # MAIN Main main# &getArgs(); &getLogs($::LOG_FILE); &outputLogs(); exit $::OK; #-------------------------------------------------------------------------------- =head3 getArgs Parse the command line and check the values Notes: =cut #-------------------------------------------------------------------------------- sub getArgs() { $Getopt::Long::ignorecase = 0; if ( !GetOptions( 'h' => \$::HELP, 'A' => \$::ALL, 'f=s' => \$::LOG_FILE, 's=s' => \$::START_TIME, 'e=s' => \$::END_TIME, 'g=s' => \$::GUIDS_FILE, 'l=s' => \$::LINKS_FILE, 'E' => \$::ERRLOG, 'S' => \$::SM, 'i' => \$::IBNODE, 'c' => \$::CHASSIS, 'u' => \$::FRU, 'n=s' => \$::NODE_LIST ) ) { usage(); exit $::NOK; } if ( $::HELP == 1 ) { usage(); exit $::OK; } if ( $::LOG_FILE eq "" ) { print "Please use \"-f\" to specify a log file."; usage(); exit $::NOK; } if ( $::SM == 0 && $::IBNODE == 0 && $::CHASSIS == 0 && $::FRU == 0 && $::NODE_LIST eq "") { #If there is no flag specified, -A is default $::ALL = 1; } if ( $::ALL == 1 && ($::SM == 1 || $::IBNODE == 1 || $::CHASSIS == 1 || $::FRU == 1)) { print "The flag -i, -S, -c or -u should not be used with -A."; usage(); exit $::NOK; } if ( ($::ALL == 1 || $::IBNODE == 1) && ( $::GUIDS_FILE eq "" || $::LINKS_FILE eq "")) { print "The -g flag and -l flag should be used with -a or -i flag.\n"; usage(); exit $::NOK; } if ( $::ERRLOG == 1 && ( $::IBNODE == 0 && $::ALL == 0 && $::NODE_LIST eq "") ) { print "The -E flag should be used with -n or -i or -a flag.\n"; usage(); exit $::NOK; } if ( $::NODE_LIST ne "" && $::GUIDS_FILE eq "" ) { print "The -n flag should be used with -g flag.\n"; usage(); exit $::NOK; } if ( $::NODE_LIST ne "") { @::NodeList = split /,/, $::NODE_LIST; #List all nodes in Managed or MinManaged mode my @AllNodes = `nodels`; chomp @AllNodes; my @NotDefinedNodes; foreach my $node (@::NodeList) { #If there are IP addresses in NodeList, convert IP to hostname first if (isIpaddr($node)) { my ($hostname, $ip) = getHost($node); $node = $hostname; } #Check nodes in NodeList and find those that are not defined in CSM database or not in Managed or MinManaged mode if (!(grep {$_ =~ /$node/} @AllNodes)) { push (@NotDefinedNodes, $node); next; } } if (scalar(@NotDefinedNodes)) { my $badnodes = join ",", @NotDefinedNodes; print "Could not find Managed Node(s) $badnodes in the node database. Run lsnode -w \"Mode='Managed' or Mode='MinManaged'\" to see the list of valid node names.\n"; exit $::NOK; } } if ( $::GUIDS_FILE ne "" ) { %::GUIDS = getGUIDs($::GUIDS_FILE); } if ( $::LINKS_FILE ne "" ) { %::HCALinks = getHCALinks($::LINKS_FILE); } if ( $::START_TIME ne "" ) { $::Start_Epoch = epochTime($::START_TIME); } else { $::Start_Epoch = 0; } if ( $::END_TIME ne "" ) { $::End_Epoch = epochTime($::END_TIME); } else { undef $::End_Epoch; } } #------------------------------------------------------------------------------- =head3 isIpaddr returns 1 if parameter is has a valid IP address form. Arguments: dot qulaified IP address: e.g. 1.2.3.4 Returns: 1 - if legal IP address 0 - if not legal IP address. Globals: none Error: none Example: if ($ipAddr) { blah; } Comments: Doesn't test if the IP address is on the network, just tests its form. =cut #------------------------------------------------------------------------------- sub isIpaddr { my ($class, $addr) = @_; #print "addr=$addr\n"; if ($addr !~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) { return 0; } if ($1 > 255 || $1 == 0 || $2 > 255 || $3 > 255 || $4 > 255) { return 0; } else { return 1; } } #------------------------------------------------------------------------------- #------------------------------------------------------------------------------- =head3 getHost Return primary hostname and ip address for the given hostname or ip address and die if hostname resolution fails. Arguments: A string: either an Ip Address or a HostName. Returns: ($nodename, $nodeip) as strings Error: returns 0; Example: my ($nodename, $nodeip) = getHost($node); Comments: none =cut #------------------------------------------------------------------------------- sub getHost { my ($class, $arg, $live) = @_; #print "arg=$arg\n"; my ($hostname, $ipaddr); if (isIpaddr($arg)) { $ipaddr = $arg; my $packedaddr = inet_aton($ipaddr); $hostname = gethostbyaddr($packedaddr,AF_INET); if (!$hostname) { $hostname = $ipaddr; } #print "hostname=$hostname\n"; } else # they specified a hostname { $hostname = $arg; # this may be a short hostname my ($name, $aliases, $addrtype, $length, @addrs) = gethostbyname($hostname); #print "name=$name, # of addrs=$#addrs.\n"; my $packedaddr = $addrs[0]; $ipaddr = inet_ntoa($packedaddr); $hostname = $name; # they may have specified a shorter or non-primary name #print "ipaddr=$ipaddr, hostname=$hostname\n"; } return ($hostname, $ipaddr); } #-------------------------------------------------------------------------------- =head3 getLogs Parse the logs Notes: Arguments: $logfile - The file name of log file Returns: =cut #-------------------------------------------------------------------------------- sub getLogs { my ($logfile) = @_; my $tsepoch; unless (open(LOGFILE, $logfile)) { print "Can't open '$logfile':\n\t$!"; exit $::NOK; } foreach my $line () { chomp $line; $tsepoch = 0; # only work with local6 logs #if ( $line =~ /local6/ ) { # parse up log entries if ($line =~ /SM:.*COND:#12/ and ($::ALL == 1 || $::SM == 1)) { my ($ts, $level, $sm, $cond, $node, $linkedto, $detail); #Parse a line of logs #The DETAIL is not always there if ( $line =~ /(.........:..:..).*MSG:(\w*)\|SM:(.*)\|COND:(.*)\|NODE:(.*)\|LINKEDTO:(.*)\|DETAIL:(.*)/ ) { ($ts, $level, $sm, $cond, $node, $linkedto, $detail) = ($1, $2, $3, $4, $5, $6, $7); } elsif ($line =~ /(.........:..:..).*MSG:(\w*)\|SM:(.*)\|COND:(.*)\|NODE:(.*)\|DETAIL:(.*)/) { ($ts, $level, $sm, $cond, $node, $detail) = ($1, $2, $3, $4, $5, $6); } $tsepoch = epochTime($ts) if($ts); #Check the time of this log is in the period specified if ( $tsepoch >= $::Start_Epoch && (!defined($::End_Epoch) || ($ts && $tsepoch <= $::End_Epoch)) ) { $::SMLOGS{$sm} = "$::SMLOGS{$sm}\n$line"; } next; } if ($line =~ /CHASSIS:/ and ($::ALL == 1 || $::CHASSIS == 1 || $::FRU == 1)) { my ($ts, $level, $chassis, $cond, $FRU, $pn, $details); if ( $line =~ /(.........:..:..).*MSG:(\w*)\|CHASSIS:(.*)\|COND:(.*)\|FRU:(.*)\|PN:(.*)\|DETAIL:(.*)/ ) { ($ts, $level, $chassis, $cond, $FRU, $pn, $details) = ($1, $2, $3, $4, $5, $6, $7); } elsif ($line =~ /(.........:..:..).*MSG:(\w*)\|CHASSIS:(.*)\|COND:(.*)\|FRU:(.*)\|PN:(.*)/) { ($ts, $level, $chassis, $cond, $FRU, $pn) = ($1, $2, $3, $4, $5, $6); } $tsepoch = epochTime($ts) if($ts); if ( $tsepoch >= $::Start_Epoch && (!defined($::End_Epoch) || ($ts && $tsepoch <= $::End_Epoch)) ) { $FRU = "$chassis - $FRU"; $::FRULOGS{$FRU} = "$::FRULOGS{$FRU}\n$line" if ($::ALL == 1 || $::FRU == 1); $::CHASSISLOGS{$chassis} = "$::CHASSISLOGS{$chassis}\n$line" if ($::ALL == 1 || $::CHASSIS == 1); } next; } if ($line =~ /SM:/ && ($::ALL == 1 || $::SM == 1 || $::IBNODE == 1 || $::NODE_LIST ne "")) { my $guid; my $port; my $guidref; my $realguid; my $lsw; my $lswref; my $hcaphys; my $host; my $elog_entry; my ($ts, $level, $sm, $cond, $node, $linkedto, $detail); if ( $line =~ /(.........:..:..).*MSG:(\w*)\|SM:(.*)\|COND:(.*)\|NODE:(.*)\|LINKEDTO:(.*)\|DETAIL:(.*)/) { ($ts, $level, $sm, $cond, $node, $linkedto, $detail) = ($1, $2, $3, $4, $5, $6, $7); } elsif ( $line =~ /(.........:..:..).*MSG:(\w*)\|SM:(.*)\|COND:(.*)\|NODE:(.*)\|LINKEDTO:(.*)/ ) { ($ts, $level, $sm, $cond, $node, $linkedto) = ($1, $2, $3, $4, $5, $6); } elsif ( $line =~ /(.........:..:..).*MSG:(\w*)\|SM:(.*)\|COND:(.*)\|NODE:(.*)\|DETAIL:(.*)/ ) { ($ts, $level, $sm, $cond, $node, $detail) = ($1, $2, $3, $4, $5, $6); } elsif ( $line =~ /(.........:..:..).*MSG:(\w*)\|SM:(.*)\|COND:(.*)\|NODE:(.*)/ ) { ($ts, $level, $sm, $cond, $node) = ($1, $2, $3, $4, $5); } if ( $sm eq "") { $sm = "Other"; } $tsepoch = epochTime($ts) if($ts); if ( $tsepoch >= $::Start_Epoch && (!defined($::End_Epoch) || ($ts && $tsepoch <= $::End_Epoch)) ) { if ($node =~ /IBM .*Logical/ && ($::ALL == 1 || $::IBNODE == 1 || $::NODE_LIST ne "")) { $guid = $node; $guid =~ s/.*:0x//; $port = $node; $port =~ s/.*port //; $port =~ s/:.*//; if ( $node =~ /IBM .*Logical Switch/ ) { #It's for Logical Switch $realguid = $guid; $guid =~ s/..$/00/; $node =~ /.*Switch (\d):.*/; $lsw = $1; #The key for HCALinks to find the next link of this Logical Switch $guidref = "$realguid:1"; } else { #It's for Logical HCA $::GUIDS{$guid}{type} =~ /.*(\d+)/; $lsw = $1; #The key for HCALinks to find the linked Logical Switch of this Logical HCA $guidref = "$guid:$port"; $lswref = $::HCALinks{$guidref}; $lswref =~ /.*GUID\s*0x(\w+).*/; $lswref = $1; #The key for HCALinks to find the next link of the Logical Switch $lswref = "$lswref:1"; } my $hcadev = $::GUIDS{$guid}{dev}; my $hcalu = "${hcadev}_lsw${lsw}"; #Find the connector $hcaphys = $::HCAMAP{$hcalu}; $host = $::GUIDS{$guid}{host}; #If this it a log for HCA, the guid can be translated to a hostname using GUIDS map $node = "$host :port $port:0x$guid" if($host); if ( $::NODE_LIST ne "" ) { if ( grep {$host =~ /$_/ || $_ =~ /$host/} @::NodeList ) { $::NODELOGSBYNAME{$host} = "$::NODELOGSBYNAME{$host}\n$line"; } } #If -E is specified, get the ERRLOG_ON and ERRLOG_OFF information if ( $::ERRLOG == 1 && $host && ($::ALL == 1 || $::IBNODE == 1 || ($::NODE_LIST ne "" && grep {$host =~ /$_/ || $_ =~ /$host/} @::NodeList))) { if ( ! exists $::ELOGON{$host}) { my $ret = getERRLOG($host); ($::ELOGON{$host}, $::ELOGOFF{$host}) = split/[_]/, $ret; } $elog_entry = "\n# ERRLOG_ON: $::ELOGON{$host}\n# ERRLOG_OFF: $::ELOGOFF{$host}"; } if ($::ALL == 1 || $::IBNODE == 1) { $::HCALinks{$guidref} =~ /(.+)\s*port.*/; my $swname = $1; my $lswn = $::HCALinks{$guidref} ? $::HCALinks{$guidref} : "Not Found"; my $swn = $::HCALinks{$lswref} ? $::HCALinks{$lswref} : "Not Found"; if ( (! exists $::NodeInfo{$node}) && $node) { if ( $node =~ /IBM .*Logical Switch/ ) { $::NodeInfo{$node} = "- <-> $lswn"; } else { $::NodeInfo{$node} = "- $::GUIDS{$guid}{dev} <-> $lswn\n" . "- Connector is $hcaphys\n" . "- $swname<-> $swn"; if ( ($::ALL == 1 || $::IBNODE == 1) && $::ERRLOG == 1 ) { $::NodeInfo{$node} = $::NodeInfo{$node} . "\n$::DASEP\n# $host$elog_entry"; } } } } if ($::ERRLOG == 1 && $host && $::NODE_LIST ne "" && (grep {$host =~ /$_/ || $_ =~ /$host/} @::NodeList)) { $::NodeInfo{$host} = "$elog_entry"; } } $::SMLOGS{$sm} = "$::SMLOGS{$sm}\n$line" if ($::ALL == 1 || $::SM == 1); $::NODELOGS{$node} = "$::NODELOGS{$node}\n$line" if ($::ALL == 1 || $::IBNODE == 1); } next; } if ($line =~ /.*/ and ($::ALL == 1)) { $line =~ /(.........:..:..).*/; my $tsepoch = epochTime($1) if($1); if ( $tsepoch >= $::Start_Epoch && (!defined($::End_Epoch) || ($1 && $tsepoch <= $::End_Epoch)) ) { $::OTHERLOGS{"others"} = "$::OTHERLOGS{\"others\"}\n$line"; } } #} } close LOGFILE; return; } #-------------------------------------------------------------------------------- =head3 outputLogs Output the results of logs analysis Notes: =cut #-------------------------------------------------------------------------------- sub outputLogs { my $SMLOGHDR = "Logs by Subnet Manager"; my $SMLOGSECT = "Reported by subnet manager: "; my $IBNODEHDR = "Logs by IB node"; my $IBNODESECT = "Reported by IB node: "; my $IBNODEHDRBYNAME = "Logs for special nodes"; my $CHLOGHDR = "Logs by CHASSIS"; my $CHLOGSECT = "Reported by chassis: "; my $FRULOGHDR = "Logs by FRUS from CHASSIS"; my $FRULOGSECT = "Associated with FRU: "; my $OTLOGHDR = "Logs by Others"; my $OTLOGSECT = ""; print < 0, "Feb" => 1, "Mar" => 2, "Apr" => 3, "May" => 4, "Jun" => 5, "Jul" => 6, "Aug" => 7, "Sep" => 8, "Oct" => 9, "Nov" => 10, "Dec" => 11, ); if ( length($timestring) > 1 ) { #If the time string is not of ddmmyyhh:mm:ss format, it will be a time stamp of a log if ( $timestring =~ /\S+\s+\d+\s+\d+:\d+:\d+/ ) { my $mont = substr $timestring, 0, 3; my $dayt = substr $timestring, 4, 2; my $time = substr $timestring, 7; if (exists $themon{$mont}) { $mont = $themon{$mont} + 1; } my ($psec, $pmin, $phour, $pmday, $pmon, $pyear, $pwday, $pyday, $pisdst) = localtime(); $pyear = $pyear + 1900; my $year = substr $pyear, 2, 2; if ($dayt < 10) { $dayt =~ s/ /0/g; } if ($mont < 10) { $mont = "0$mont"; } #Format the timestring to ddmmyyhh:mm:ss $timestring = "$dayt$mont$year$time"; } } if ( ! ($timestring =~ /^\d+:\d+:\d+$/) ) { print "Invalid time '$ots'\n"; exit $::NOK; } my $day = substr $timestring, 0, 2; if ( !($day =~ /^\d+$/) || $day < 1 || $day > 31 ) { $formaterror = 1; } my $mon = substr $timestring, 2, 2; if ( !($mon =~ /^\d+$/) || $mon < 1 || $mon > 12 ) { $formaterror = 1; } $mon = $mon - 1; my $year = substr $timestring, 4, 2; if ( !($year =~ /^\d+$/) ) { $formaterror = 1; } $year = $year - 1900; if ( $year < 0 ) { $year = $year + 2000 } if ( $year > 138 ) { $formaterror = 1; } my $hour = substr $timestring, 6, 2; if ( !($hour =~ /^\d+$/) || $hour < 0 || $hour > 23 ) { $formaterror = 1; } my $min = substr $timestring, 9, 2; if ( !($min =~ /^\d+$/) || $min < 0 || $min > 59 ) { $formaterror = 1; } my $sec = substr $timestring, 12, 2; if ( !($sec =~ /^\d+$/) || $sec < 0 || $sec > 59 ) { $formaterror = 1; } if($formaterror) { print "Invalid time '$ots'\n"; exit $::NOK; } $epoch = timegm($sec, $min, $hour, $day, $mon, $year); return ($epoch); } #-------------------------------------------------------------------------------- =head3 usage usage for annotatelog =cut #-------------------------------------------------------------------------------- sub usage { print <) { chomp $line; if ($line =~ /(\S*): (\S*): (\S*): (\w*$)/) { my ($host, $dev, $type, $guid) = ($1, $2, $3, $4); $guids{$guid}{host} = $host; $guids{$guid}{dev} = $dev; $guids{$guid}{type} = $type; } } close GUIDSFILE; return %guids; } #-------------------------------------------------------------------------------- =head3 getHCALinks Get links between HCA and Switch and links between Switch and Swtich Notes: Arguments: $hcalfile - A link file fullpath name, which concatenates all '/var/opt/iba/analysis/baseline/fabric*links' files from all fabric management nodes. Returns: %hcalinks - A hash table recording links =cut #-------------------------------------------------------------------------------- sub getHCALinks { my ($hcalfile) = @_; my $getit = 0; my $hcaport = ""; my %hcalinks; unless(open(LINKS, $hcalfile)) { print "Can't open HCA Links file '$hcalfile':\n\t$!"; exit $::NOK; } foreach my $line () { if ( $getit == 1 ) { if ($line =~ /.*0x(\S*)\s+(\d+)\s+(\w+)\s+(.*)/) { my ($guid, $port, $type, $name) = ($1, $2, $3, $4); $hcalinks{$hcaport} = "$name port $port (GUID 0x$guid)"; $getit = 0; } } if ($line =~ /g.*IBM.*Logical/) { if ($line =~ /.*g 0x(\S*) *(\d*) .*/) { my $port; ($hcaport, $port) = ($1, $2); $hcaport = "$hcaport:$port"; $getit = 1; } } else { $getit = 0; } } foreach my $key (keys %::HCALinks) { print "$key -> $hcalinks{$key}\n"; } close LINKS; return %hcalinks; } #-------------------------------------------------------------------------------- =head3 checkDshReachability Notes: Check the dsh reachability between the Management Nodes and node. Arguments: $node - the remote node hostname. Returns: $::OK - The remote node is reachable through dsh. $::NOK - The remote node is unreachable through dsh. =cut #-------------------------------------------------------------------------------- sub checkDshReachability() { my ($node) = @_; my $output = `dsh -Q -n $node date 2>/dev/null`; return $?; }