add new ping-all script for HPC
git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/branches/2.7@13595 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd
This commit is contained in:
parent
f2744659a7
commit
b2c1c84696
44
xCAT-IBMhpc/share/xcat/IBMhpc/ping-all/README.AIX
Normal file
44
xCAT-IBMhpc/share/xcat/IBMhpc/ping-all/README.AIX
Normal file
@ -0,0 +1,44 @@
|
||||
Run as root on the EMS.
|
||||
|
||||
This test uses xdsh and uses "lpar" for the node group. Use a different node group if desired.
|
||||
|
||||
Note xdsh can leave behind processes if you do a <CNTRL-C> in the middle of a command.
|
||||
To gaurd against results which are messed up by old copies of the ping test, kill
|
||||
any leftover ping processes.
|
||||
|
||||
|
||||
1) Create the ping_address_file. This is only required if changes have been made since the last
|
||||
run; e.g., octants deconfigured, ARP entries changed, etc.
|
||||
|
||||
"./create_ping_address_file [interface]"
|
||||
|
||||
If the "interface" argument is not provided, it defaults to "ml0" as the interface to test.
|
||||
|
||||
Remove any nodes you don't want to run the ping test.
|
||||
|
||||
2) Copy "ping_address_file" to /tmp on each node:
|
||||
|
||||
xdcp lpar -v ping_address_file /tmp
|
||||
|
||||
3) Copy "ping_all" to /tmp on each node:
|
||||
|
||||
xdcp lpar -v ping_all /tmp
|
||||
|
||||
4) Remove any output files from previous runs:
|
||||
|
||||
xdsh lpar -v "rm /tmp/ping.all.out 2>/dev/null"
|
||||
|
||||
5) Run the ping test:
|
||||
|
||||
"./start_ping_all"
|
||||
|
||||
Each node in the test will write any ping failures to "/tmp/ping.all.out" (local /tmp on each node).
|
||||
(No "ping.all.out" file indicates no ping failures.)
|
||||
|
||||
6) To determine if we have ping failures:
|
||||
|
||||
xdsh lpar -v "ls -l /tmp/ping.all.out 2>/dev/null" | sort -n +5
|
||||
|
||||
If a given set of "ping.all.out" files are the same length, it's likely they will have
|
||||
duplicate ping failures.
|
||||
|
10
xCAT-IBMhpc/share/xcat/IBMhpc/ping-all/create_ping_address_file
Executable file
10
xCAT-IBMhpc/share/xcat/IBMhpc/ping-all/create_ping_address_file
Executable file
@ -0,0 +1,10 @@
|
||||
#!/bin/ksh
|
||||
if [[ -n $1 ]] then
|
||||
interface=$1
|
||||
else
|
||||
interface=ml0
|
||||
fi
|
||||
|
||||
xdsh lpar -v "/usr/sbin/ifconfig $interface | grep inet | cut -d':' -f2 | cut -d' ' -f2 | head -2 " | sort -n | tee ping_address_file
|
||||
|
||||
|
@ -0,0 +1,4 @@
|
||||
c250f08c01ap01: 30.8.1.1
|
||||
c250f08c01ap05: 30.8.1.5
|
||||
c250f08c01ap09: 30.8.1.9
|
||||
c250f08c01ap13: 30.8.1.13
|
88
xCAT-IBMhpc/share/xcat/IBMhpc/ping-all/ping_all
Executable file
88
xCAT-IBMhpc/share/xcat/IBMhpc/ping-all/ping_all
Executable file
@ -0,0 +1,88 @@
|
||||
#!/bin/ksh
|
||||
|
||||
function do_pings {
|
||||
#set -x
|
||||
|
||||
integer dest_array_index=0
|
||||
integer total_ping_count=0
|
||||
integer ping_loop_count=0
|
||||
integer tmp_fail_int=0
|
||||
integer total_fails=0
|
||||
|
||||
dest_array_index=0
|
||||
ping_loop_count=ping_loop_count+1
|
||||
|
||||
cat $file | grep -v "^#" |
|
||||
while read input_file_line ; do
|
||||
switch_dest=$( print $input_file_line | awk ' { print $2 } ' )
|
||||
netstat -in | grep -q "$switch_dest "
|
||||
if [[ $? -ne 0 ]] then
|
||||
dest_hostname=$( print $input_file_line | awk ' { print $1 } ' )
|
||||
dest_array_index=dest_array_index+1
|
||||
total_ping_count=total_ping_count+1
|
||||
|
||||
#ping -c 2 $switch_dest | grep "100% packet loss"
|
||||
#echo `hostname` pinging $switch_dest
|
||||
ping -f -c 10 $switch_dest | grep -q "100% packet loss"
|
||||
if [[ $? -eq 0 ]] then
|
||||
total_fails=total_fails+1
|
||||
|
||||
#hack for now to try to clear bad ml0 routes
|
||||
#echo "`hostname -s`:removing $switch_dest from ml route table to attempt route refresh"
|
||||
/usr/sbin/mltdd_dump -d $switch_dest
|
||||
#remove following comment to print data back to (typically NFS mounted) common dir
|
||||
|
||||
##### To remove write output to NFS output, you can comment the following line
|
||||
#ping -f -c 10 $switch_dest | grep "100% packet loss"
|
||||
|
||||
ping -f -c 10 $switch_dest | grep "100% packet loss" >> /tmp/ping.all.out
|
||||
|
||||
dest_failures[$dest_array_index]=${dest_failures[dest_array_index]}+1
|
||||
tmp_fail_int=${dest_failures[dest_array_index]}
|
||||
total_fail_rate=$(print "$total_fails/$total_ping_count*100" | bc -l | awk ' { printf(" %4.2f\n", $0 ) } ' )
|
||||
dest_fail_rate=$(print "$tmp_fail_int/$ping_loop_count*100" | bc -l | awk ' { printf(" %4.2f\n", $0 ) } ')
|
||||
|
||||
##### To remove write output to NFS output, you can comment the following line
|
||||
#print "failure to $dest_hostname $switch_dest total: $total_fail_rate cur: $dest_array_index $dest_fail_rate num_pings: $ping_loop_count"
|
||||
|
||||
print "failure to $dest_hostname $switch_dest total: $total_fail_rate cur: $dest_array_index $dest_fail_rate num_pings: $ping_loop_count" >> /tmp/ping.all.out
|
||||
fi
|
||||
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
#main
|
||||
unset found_flag
|
||||
rm /tmp/ping.all.out 2>/dev/null
|
||||
|
||||
if [[ -z $1 ]] then
|
||||
file=ping_address_file
|
||||
else
|
||||
file=$1
|
||||
fi
|
||||
|
||||
ifconfig -a | grep 'inet ' | awk ' { print $2 } ' | grep -v 127.0.0.1 |
|
||||
while read my_address ; do
|
||||
##print "checking $my_address"
|
||||
grep -q " ${my_address}$" $file
|
||||
if [[ $? -eq 0 ]] then
|
||||
##print "Found my address: $my_address"
|
||||
integer starting_line=$(grep -n " $my_address$" $file | awk -F ":" ' { print $1 } ' )
|
||||
integer starting_line_minus_1=$starting_line-1
|
||||
tail +${starting_line} $file > /tmp/current_ping_address_file
|
||||
head -${starting_line_minus_1} $file >> /tmp/current_ping_address_file
|
||||
file=/tmp/current_ping_address_file
|
||||
do_pings
|
||||
found_flag=1
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ -z $found_flag ]] then
|
||||
print "Unable to find my entry in the $file for host: $(hostname -s)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
rm /tmp/current_ping_address_file 2>/dev/null
|
||||
exit 0
|
4
xCAT-IBMhpc/share/xcat/IBMhpc/ping-all/start_ping_all
Executable file
4
xCAT-IBMhpc/share/xcat/IBMhpc/ping-all/start_ping_all
Executable file
@ -0,0 +1,4 @@
|
||||
#!/bin/ksh
|
||||
|
||||
xdsh lpar -v "cd /tmp; ./ping_all ./ping_address_file"
|
||||
|
Loading…
Reference in New Issue
Block a user