add new ping-all script for HPC

git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/branches/2.7@13595 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd
This commit is contained in:
lissav 2012-08-23 18:40:17 +00:00
parent f2744659a7
commit b2c1c84696
5 changed files with 150 additions and 0 deletions

View File

@ -0,0 +1,44 @@
Run as root on the EMS.
This test uses xdsh and uses "lpar" for the node group. Use a different node group if desired.
Note xdsh can leave behind processes if you do a <CNTRL-C> in the middle of a command.
To gaurd against results which are messed up by old copies of the ping test, kill
any leftover ping processes.
1) Create the ping_address_file. This is only required if changes have been made since the last
run; e.g., octants deconfigured, ARP entries changed, etc.
"./create_ping_address_file [interface]"
If the "interface" argument is not provided, it defaults to "ml0" as the interface to test.
Remove any nodes you don't want to run the ping test.
2) Copy "ping_address_file" to /tmp on each node:
xdcp lpar -v ping_address_file /tmp
3) Copy "ping_all" to /tmp on each node:
xdcp lpar -v ping_all /tmp
4) Remove any output files from previous runs:
xdsh lpar -v "rm /tmp/ping.all.out 2>/dev/null"
5) Run the ping test:
"./start_ping_all"
Each node in the test will write any ping failures to "/tmp/ping.all.out" (local /tmp on each node).
(No "ping.all.out" file indicates no ping failures.)
6) To determine if we have ping failures:
xdsh lpar -v "ls -l /tmp/ping.all.out 2>/dev/null" | sort -n +5
If a given set of "ping.all.out" files are the same length, it's likely they will have
duplicate ping failures.

View File

@ -0,0 +1,10 @@
#!/bin/ksh
if [[ -n $1 ]] then
interface=$1
else
interface=ml0
fi
xdsh lpar -v "/usr/sbin/ifconfig $interface | grep inet | cut -d':' -f2 | cut -d' ' -f2 | head -2 " | sort -n | tee ping_address_file

View File

@ -0,0 +1,4 @@
c250f08c01ap01: 30.8.1.1
c250f08c01ap05: 30.8.1.5
c250f08c01ap09: 30.8.1.9
c250f08c01ap13: 30.8.1.13

View File

@ -0,0 +1,88 @@
#!/bin/ksh
function do_pings {
#set -x
integer dest_array_index=0
integer total_ping_count=0
integer ping_loop_count=0
integer tmp_fail_int=0
integer total_fails=0
dest_array_index=0
ping_loop_count=ping_loop_count+1
cat $file | grep -v "^#" |
while read input_file_line ; do
switch_dest=$( print $input_file_line | awk ' { print $2 } ' )
netstat -in | grep -q "$switch_dest "
if [[ $? -ne 0 ]] then
dest_hostname=$( print $input_file_line | awk ' { print $1 } ' )
dest_array_index=dest_array_index+1
total_ping_count=total_ping_count+1
#ping -c 2 $switch_dest | grep "100% packet loss"
#echo `hostname` pinging $switch_dest
ping -f -c 10 $switch_dest | grep -q "100% packet loss"
if [[ $? -eq 0 ]] then
total_fails=total_fails+1
#hack for now to try to clear bad ml0 routes
#echo "`hostname -s`:removing $switch_dest from ml route table to attempt route refresh"
/usr/sbin/mltdd_dump -d $switch_dest
#remove following comment to print data back to (typically NFS mounted) common dir
##### To remove write output to NFS output, you can comment the following line
#ping -f -c 10 $switch_dest | grep "100% packet loss"
ping -f -c 10 $switch_dest | grep "100% packet loss" >> /tmp/ping.all.out
dest_failures[$dest_array_index]=${dest_failures[dest_array_index]}+1
tmp_fail_int=${dest_failures[dest_array_index]}
total_fail_rate=$(print "$total_fails/$total_ping_count*100" | bc -l | awk ' { printf(" %4.2f\n", $0 ) } ' )
dest_fail_rate=$(print "$tmp_fail_int/$ping_loop_count*100" | bc -l | awk ' { printf(" %4.2f\n", $0 ) } ')
##### To remove write output to NFS output, you can comment the following line
#print "failure to $dest_hostname $switch_dest total: $total_fail_rate cur: $dest_array_index $dest_fail_rate num_pings: $ping_loop_count"
print "failure to $dest_hostname $switch_dest total: $total_fail_rate cur: $dest_array_index $dest_fail_rate num_pings: $ping_loop_count" >> /tmp/ping.all.out
fi
fi
done
}
#main
unset found_flag
rm /tmp/ping.all.out 2>/dev/null
if [[ -z $1 ]] then
file=ping_address_file
else
file=$1
fi
ifconfig -a | grep 'inet ' | awk ' { print $2 } ' | grep -v 127.0.0.1 |
while read my_address ; do
##print "checking $my_address"
grep -q " ${my_address}$" $file
if [[ $? -eq 0 ]] then
##print "Found my address: $my_address"
integer starting_line=$(grep -n " $my_address$" $file | awk -F ":" ' { print $1 } ' )
integer starting_line_minus_1=$starting_line-1
tail +${starting_line} $file > /tmp/current_ping_address_file
head -${starting_line_minus_1} $file >> /tmp/current_ping_address_file
file=/tmp/current_ping_address_file
do_pings
found_flag=1
break
fi
done
if [[ -z $found_flag ]] then
print "Unable to find my entry in the $file for host: $(hostname -s)"
exit 1
fi
rm /tmp/current_ping_address_file 2>/dev/null
exit 0

View File

@ -0,0 +1,4 @@
#!/bin/ksh
xdsh lpar -v "cd /tmp; ./ping_all ./ping_address_file"