additional flowcontrol error handling defect 3819

This commit is contained in:
lissav 2013-10-17 11:43:56 -04:00
parent cad8c86a6b
commit 46c9d5d788
3 changed files with 226 additions and 72 deletions

View File

@ -1,6 +1,14 @@
#!/bin/sh
# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html
#(C)IBM Corp
# This script adds xCAT specific setup to the /etc/ssh/sshd_config and ssh_config file
# It calls getcredentials.awk to get from the xcatmaster ssh host keys from the
# /etc/xcat/hostkeys directory and puts in /etc/ssh on the node the following keys
# ssh_host_dsa_key,ssh_host_rsa_key
# It calls getcredentials.awk to get from the xcatmaster root ssh keys from the
# ~/.ssh directory on the xcatmaster and put in ~/.ssh on the node the following keys:
# If site.enablesshbetweennodes is yes
# id.rsa
#
# if on the Management Node, exit
if [ -e /etc/xCATMN ]; then
@ -47,7 +55,7 @@ then
fi
fi
if [ -r /etc/ssh/sshd_config ]
if [ -r /etc/ssh/ssh_config ]
then
sed -i '/StrictHostKeyChecking /'d /etc/ssh/ssh_config
echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config
@ -65,11 +73,6 @@ then
chmod 600 /root/.ssh/*
fi
#if [ -d /xcatpost/hostkeys ]
#then
# logger -t xcat "Install: using server provided host key for convenience."
# cp /xcatpost/hostkeys/*_key /etc/ssh/
#fi
if [ ! -x /usr/bin/openssl ]; then
logger -t xcat -p local4.err "Install: /usr/bin/openssl is not executable."
exit 0
@ -78,27 +81,31 @@ allowcred.awk &
CREDPID=$!
sleep 1
if [ $useflowcontrol = "1" ]; then
#first contact daemon xcatflowrequest <server> 3001
logger -t xCAT -p local4.info "remoteshell: sending /$xcatpost/xcatflowrequest $master 3001"
/$xcatpost/xcatflowrequest $master 3001
logger -t xCAT -p local4.info "remoteshell: sending xcatflowrequest $master 3001"
/$xcatpost/xcatflowrequest $master 3001
rc=$?
logger -t xCAT -p local4.info "remoteshell:xcatflowrequest received response return=$rc"
if [ $rc -ne 0 ]; then
logger -t xCAT -p local4.info "remoteshell: error from xcatflowrequest, will not use flow control"
useflowcontrol=0
fi
fi
getcredentials.awk ssh_dsa_hostkey | grep -E -v '</{0,1}xcatresponse>|</{0,1}serverdone>' | sed -e 's/&lt;/</' -e 's/&gt;/>/' -e 's/&amp;/&/' -e 's/&quot/"/' -e "s/&apos;/'/" > /tmp/ssh_dsa_hostkey
if [ $useflowcontrol = "1" ]; then
logger -t xCAT -p local4.info "remoteshell: received response /$xcatpost/xcatflowrequest $master 3001"
fi
#check the message is an error or not
grep -E '<error>' /tmp/ssh_dsa_hostkey
if [ $? -ne 0 ]; then
#the message received is the data
cat /tmp/ssh_dsa_hostkey | grep -E -v '</{0,1}data>|</{0,1}content>|</{0,1}desc>' >/etc/ssh/ssh_host_dsa_key
logger -t xCAT -p local4.info ssh_dsa_hostkey
logger -t xCAT -p local4.info "remoteshell: getting ssh_host_dsa_key"
MAX_RETRIES=10
RETRY=0
MYCONT=`cat /etc/ssh/ssh_host_dsa_key`
while [ -z "$MYCONT" ]; do
MYCONT=`cat /etc/ssh/ssh_host_dsa_key`
while [ -z "$MYCONT" ]; do
# not using flow control , need to sleep
if [ $useflowcontrol = "0" ]; then
let SLI=$RANDOM%10
@ -113,23 +120,26 @@ if [ $? -ne 0 ]; then
fi
if [ $useflowcontrol = "1" ]; then
#first contact daemon xcatflowrequest <server> 3001
logger -t xCAT -p local4.info "remoteshell: sending /$xcatpost/xcatflowrequest $master 3001"
logger -t xCAT -p local4.info "remoteshell: sending xcatflowrequest $master 3001"
/$xcatpost/xcatflowrequest $master 3001
rc=$?
logger -t xCAT -p local4.info "remoteshell:xcatflowrequest return=$rc"
if [ $rc -ne 0 ]; then
logger -t xCAT -p local4.info "remoteshell: error from xcatflowrequest, will not use flow control"
useflowcontrol=0
fi
fi
getcredentials.awk ssh_dsa_hostkey | grep -v '<'|sed -e 's/&lt;/</' -e 's/&gt;/>/' -e 's/&amp;/&/' -e 's/&quot/"/' -e "s/&apos;/'/" > /etc/ssh/ssh_host_dsa_key
if [ $useflowcontrol = "1" ]; then
logger -t xCAT -p local4.info "remoteshell: received response /$xcatpost/xcatflowrequest $master 3001"
fi
MYCONT=`cat /etc/ssh/ssh_host_dsa_key`
done
chmod 600 /etc/ssh/ssh_host_dsa_key
if ! grep "PRIVATE KEY" /etc/ssh/ssh_host_dsa_key > /dev/null 2>&1 ; then
rm /etc/ssh/ssh_host_dsa_key
done
chmod 600 /etc/ssh/ssh_host_dsa_key
if ! grep "PRIVATE KEY" /etc/ssh/ssh_host_dsa_key > /dev/null 2>&1 ; then
rm /etc/ssh/ssh_host_dsa_key
else
ssh-keygen -y -f /etc/ssh/ssh_host_dsa_key > /etc/ssh/ssh_host_dsa_key.pub
chmod 644 /etc/ssh/ssh_host_dsa_key.pub
chown root /etc/ssh/ssh_host_dsa_key.pub
fi
fi
else
#the message received is an error, so parse it
ERR_MSG=`sed -n 's%.*<error>\(.*\)</error>.*%\1%p' /tmp/ssh_dsa_hostkey`
@ -137,17 +147,19 @@ else
fi
rm /tmp/ssh_dsa_hostkey
# first contact daemon xcatflowrequest <server> 3001
if [ $useflowcontrol = "1" ]; then
#first contact daemon xcatflowrequest <server> 3001
logger -t xCAT -p local4.info "remoteshell: sending /$xcatpost/xcatflowrequest $master 3001"
logger -t xCAT -p local4.info "remoteshell: sending xcatflowrequest $master 3001"
/$xcatpost/xcatflowrequest $master 3001
rc=$?
logger -t xCAT -p local4.info "remoteshell:xcatflowrequest return=$rc"
if [ $rc -ne 0 ]; then
logger -t xCAT -p local4.info "remoteshell: error from xcatflowrequest, will not use flow control"
useflowcontrol=0
fi
fi
getcredentials.awk ssh_rsa_hostkey | grep -E -v '</{0,1}xcatresponse>|</{0,1}serverdone>' | sed -e 's/&lt;/</' -e 's/&gt;/>/' -e 's/&amp;/&/' -e 's/&quot/"/' -e "s/&apos;/'/" > /tmp/ssh_rsa_hostkey
if [ $useflowcontrol = "1" ]; then
logger -t xCAT -p local4.info "remoteshell: received response /$xcatpost/xcatflowrequest $master 3001"
fi
#check whether the message is an error or not
grep -E '<error>' /tmp/ssh_rsa_hostkey
if [ $? -ne 0 ]; then
@ -169,17 +181,19 @@ if [ $? -ne 0 ]; then
then
break
fi
# first contact daemon xcatflowrequest <server> 3001
if [ $useflowcontrol = "1" ]; then
#first contact daemon xcatflowrequest <server> 3001
logger -t xCAT -p local4.info "remoteshell: sending /$xcatpost/xcatflowrequest $master 3001"
logger -t xCAT -p local4.info "remoteshell: sending xcatflowrequest $master 3001"
/$xcatpost/xcatflowrequest $master 3001
rc=$?
logger -t xCAT -p local4.info "remoteshell:xcatflowrequest return=$rc"
if [ $rc -ne 0 ]; then
logger -t xCAT -p local4.info "remoteshell: error from xcatflowrequest, will not use flow control"
useflowcontrol=0
fi
fi
getcredentials.awk ssh_rsa_hostkey | grep -v '<'|sed -e 's/&lt;/</' -e 's/&gt;/>/' -e 's/&amp;/&/' -e 's/&quot/"/' -e "s/&apos;/'/" > /etc/ssh/ssh_host_rsa_key
if [ $useflowcontrol = "1" ]; then
logger -t xCAT -p local4.info "remoteshell: received response /$xcatpost/xcatflowrequest $master 3001"
fi
MYCONT=`cat /etc/ssh/ssh_host_rsa_key`
done
chmod 600 /etc/ssh/ssh_host_rsa_key
@ -206,20 +220,21 @@ fi
umask 0077
mkdir -p /root/.ssh/
sleep 1
if [ $ENABLESSHBETWEENNODES = "YES" ];
if [ $ENABLESSHBETWEENNODES = "YES" ]; # want nodes to be able to ssh to each other without password
then
#first contact daemon xcatflowrequest <server> 3001
if [ $useflowcontrol = "1" ]; then
#first contact daemon xcatflowrequest <server> 3001
logger -t xCAT -p local4.info "remoteshell: sending /$xcatpost/xcatflowrequest $master 3001"
logger -t xCAT -p local4.info "remoteshell: sending xcatflowrequest $master 3001"
/$xcatpost/xcatflowrequest $master 3001
rc=$?
logger -t xCAT -p local4.info "remoteshell:xcatflowrequest return=$rc"
if [ $rc -ne 0 ]; then
logger -t xCAT -p local4.info "remoteshell: error from xcatflowrequest, will not use flow control"
useflowcontrol=0
fi
fi
getcredentials.awk ssh_root_key | grep -E -v '</{0,1}xcatresponse>|</{0,1}serverdone>'|sed -e 's/&lt;/</' -e 's/&gt;/>/' -e 's/&amp;/&/' -e 's/&quot/"/' -e "s/&apos;/'/" > /tmp/ssh_root_key
if [ $useflowcontrol = "1" ]; then
logger -t xCAT -p local4.info "remoteshell: received response /$xcatpost/xcatflowrequest $master 3001"
fi
#check whether the message is an error or not
grep -E '<error>' /tmp/ssh_root_key
if [ $? -ne 0 ]; then
@ -240,16 +255,18 @@ then
then
break
fi
# first contact daemon xcatflowrequest <server> 3001
if [ $useflowcontrol = "1" ]; then
#first contact daemon xcatflowrequest <server> 3001
logger -t xCAT -p local4.info "remoteshell: sending /$xcatpost/xcatflowrequest $master 3001"
logger -t xCAT -p local4.info "remoteshell: sending xcatflowrequest $master 3001"
/$xcatpost/xcatflowrequest $master 3001
rc=$?
logger -t xCAT -p local4.info "remoteshell:xcatflowrequest return=$rc"
if [ $rc -ne 0 ]; then
logger -t xCAT -p local4.info "remoteshell: error from xcatflowrequest, will not use flow control"
useflowcontrol=0
fi
fi
getcredentials.awk ssh_root_key | grep -v '<'|sed -e 's/&lt;/</' -e 's/&gt;/>/' -e 's/&amp;/&/' -e 's/&quot/"/' -e "s/&apos;/'/" > /root/.ssh/id_rsa
if [ $useflowcontrol = "1" ]; then
logger -t xCAT -p local4.info "remoteshell: received response /$xcatpost/xcatflowrequest $master 3001"
fi
MYCONT=`cat /root/.ssh/id_rsa`
done
else

View File

@ -497,12 +497,15 @@ if [ ! -x /$xcatpost/mypostscript ]; then
# first contact daemon xcatflowrequest <server> 3001
logger -t xCAT -p local4.info "xcatdsklspost:sending xcatflowrequest $SIP 3001"
/$xcatpost/xcatflowrequest $SIP 3001
rc=$?
logger -t xCAT -p local4.info "xcatdsklspost:xcatflowrequest return=$rc"
if [ $rc -ne 0 ]; then
logger -t xCAT -p local4.info "xcatdsklspost: error from xcatflowrequest, will not use flow control"
useflowcontrol=0
fi
fi
/$xcatpost/getpostscript.awk | egrep '<data>' | sed -e 's/<[^>]*>//g'|egrep -v '^ *$'|sed -e 's/^ *//' > /$xcatpost/mypostscript;
if [ $useflowcontrol = "1" ]; then
logger -t xCAT -p local4.info "xcatdsklspost: received response /$xcatpost/xcatflowrequest $master 3001"
fi
MYCONT=`grep MASTER /$xcatpost/mypostscript`
MAX_RETRIES=10
@ -525,11 +528,14 @@ if [ ! -x /$xcatpost/mypostscript ]; then
# contact daemon xcatflowrequest <server> 3001
logger -t xCAT -p local4.info "xcatdsklspost: sending xcatflowrequest $SIP 3001"
/$xcatpost/xcatflowrequest $SIP 3001
rc=$?
logger -t xCAT -p local4.info "xcatdsklspost:xcatflowrequest return=$rc"
if [ $rc -ne 0 ]; then
logger -t xCAT -p local4.info "xcatdsklspost: error from xcatflowrequest, will not use flow control"
useflowcontrol=0
fi
fi
/$xcatpost/getpostscript.awk | sed -e 's/<[^>]*>//g'|egrep -v '^ *$'|sed -e 's/^ *//' > /$xcatpost/mypostscript;
if [ $useflowcontrol = "1" ]; then
logger -t xCAT -p local4.info "xcatdsklspost: received response /$xcatpost/xcatflowrequest $master 3001"
fi
MYCONT=`grep MASTER /$xcatpost/mypostscript`
if [ ! -z "$MYCONT" ]; then
break;

View File

@ -29,26 +29,157 @@ if [ ! -x /usr/bin/openssl ]; then
fi
USEOPENSSLFORXCAT=1
export USEOPENSSLFORXCAT
allowcred.awk &
CREDPID=$!
sleep 1
mkdir -p /etc/xcat/cert
getcredentials.awk xcat_server_cred | grep -v '<'|sed -e 's/&lt;/</' -e 's/&gt;/>/' -e 's/&amp;/&/' -e 's/&quot/"/' -e "s/&apos;/'/" > /etc/xcat/cert/server-cred.pem
# are we using xcat flow control
useflowcontrol=0
if [ $USEFLOWCONTROL = "YES" ] || [ $USEFLOWCONTROL = "yes" ] || [ $USEFLOWCONTROL = "1" ]; then
useflowcontrol=1
fi
xcatpost="xcatpost"
master=$MASTER
allowcred.awk &
CREDPID=$!
sleep 1
# setup and get the xCAT SSL credentials down to the service node
# create SSL certificate directory and then get them
mkdir -p /etc/xcat/cert
if [ $useflowcontrol = "1" ]; then
#first contact daemon xcatflowrequest <server> 3001
logger -t xCAT -p local4.info "xcatserver: sending xcatflowrequest $master 3001"
/$xcatpost/xcatflowrequest $master 3001
rc=$?
logger -t xCAT -p local4.info "xcatserver:xcatflowrequest received response return=$rc"
if [ $rc -ne 0 ]; then
logger -t xCAT -p local4.info "xcatserver: error from xcatflowrequest, will not use flow control"
useflowcontrol=0
fi
fi
getcredentials.awk xcat_server_cred | grep -v '<'|sed -e 's/&lt;/</' -e 's/&gt;/>/' -e 's/&amp;/&/' -e 's/&quot/"/' -e "s/&apos;/'/" > /etc/xcat/cert/server-cred.pem
#check the message is an error or not
grep -E '<error>' /etc/xcat/cert/server-cred.pem
if [ $? -ne 0 ]; then
#the message received is the data
logger -t xCAT -p local4.info "xcatserver: getting server-cred.pem"
MAX_RETRIES=10
RETRY=0
MYCONT=`cat /etc/xcat/cert/server-cred.pem`
while [ -z "$MYCONT" ]; do
# not using flow control , need to sleep
if [ $useflowcontrol = "0" ]; then
let SLI=$RANDOM%10
let SLI=SLI+10
sleep $SLI
fi
RETRY=$(($RETRY+1))
if [ $RETRY -eq $MAX_RETRIES ]
then
break
fi
if [ $useflowcontrol = "1" ]; then
#first contact daemon xcatflowrequest <server> 3001
logger -t xCAT -p local4.info "xcatserver: sending xcatflowrequest $master 3001"
/$xcatpost/xcatflowrequest $master 3001
rc=$?
logger -t xCAT -p local4.info "xcatserver:xcatflowrequest return=$rc"
if [ $rc -ne 0 ]; then
logger -t xCAT -p local4.info "xcatserver: error from xcatflowrequest, will not use flow control"
useflowcontrol=0
fi
fi
getcredentials.awk xcat_server_cred | grep -v '<'|sed -e 's/&lt;/</' -e 's/&gt;/>/' -e 's/&amp;/&/' -e 's/&quot/"/' -e "s/&apos;/'/" > /etc/xcat/cert/server-cred.pem
MYCONT=`cat /etc/xcat/cert/server-cred.pem`
done
chmod 600 /etc/xcat/cert/*
getcredentials.awk xcat_cfgloc | grep -v '<'|sed -e 's/&lt;/</' -e 's/&gt;/>/' -e 's/&amp;/&/' -e 's/&quot/"/' -e "s/&apos;/'/" > /etc/xcat/cfgloc
# if not DB2
grep "DB2" /etc/xcat/cfgloc 2>&1 1> /dev/null
if [ $? -ne 0 ]; then
sed s/host=[^\|]*/host=$MASTER/ /etc/xcat/cfgloc > /etc/xcat/cfgloc.new
mv /etc/xcat/cfgloc.new /etc/xcat/cfgloc
else
if [ -n "$UPDATENODE" ] && [ $UPDATENODE -eq 1 ]; then
cp /etc/xcat/cfgloc /etc/xcat/cfgloc.db2
else
mv /etc/xcat/cfgloc /etc/xcat/cfgloc.db2
fi
fi
chmod 600 /etc/xcat/cfgloc*
# do not assume working directory, use the full path
cp /xcatpost/_xcat/ca.pem /etc/xcat/cert/ca.pem
kill -9 $CREDPID
else # error from first getcredential call
#the message received is an error from credentials.pm, so parse it
ERR_MSG=`sed -n 's%.*<error>\(.*\)</error>.*%\1%p' /etc/xcat/cert/server-cred.pem`
logger -t xCAT -p local4.err xcatserver: $ERR_MSG
rm /etc/xcat/cert/server-cred.pem
fi
# get the xcat cfgloc file
if [ $useflowcontrol = "1" ]; then
#first contact daemon xcatflowrequest <server> 3001
logger -t xCAT -p local4.info "xcatserver: sending xcatflowrequest $master 3001"
/$xcatpost/xcatflowrequest $master 3001
rc=$?
logger -t xCAT -p local4.info "xcatserver:xcatflowrequest received response return=$rc"
if [ $rc -ne 0 ]; then
logger -t xCAT -p local4.info "xcatserver: error from xcatflowrequest, will not use flow control"
useflowcontrol=0
fi
fi
getcredentials.awk xcat_cfgloc | grep -v '<'|sed -e 's/&lt;/</' -e 's/&gt;/>/' -e 's/&amp;/&/' -e 's/&quot/"/' -e "s/&apos;/'/" > /etc/xcat/cfgloc
#check the message is an error or not
grep -E '<error>' /etc/xcat/cfgloc
if [ $? -ne 0 ]; then
#the message received is the data
logger -t xCAT -p local4.info "xcatserver: getting cfgloc"
MAX_RETRIES=10
RETRY=0
MYCONT=`cat /etc/xcat/cfgloc`
while [ -z "$MYCONT" ]; do
# not using flow control , need to sleep
if [ $useflowcontrol = "0" ]; then
let SLI=$RANDOM%10
let SLI=SLI+10
sleep $SLI
fi
RETRY=$(($RETRY+1))
if [ $RETRY -eq $MAX_RETRIES ]
then
break
fi
if [ $useflowcontrol = "1" ]; then
#first contact daemon xcatflowrequest <server> 3001
logger -t xCAT -p local4.info "xcatserver: sending xcatflowrequest $master 3001"
/$xcatpost/xcatflowrequest $master 3001
rc=$?
logger -t xCAT -p local4.info "xcatserver:xcatflowrequest return=$rc"
if [ $rc -ne 0 ]; then
logger -t xCAT -p local4.info "xcatserver: error from xcatflowrequest, will not use flow control"
useflowcontrol=0
fi
fi
getcredentials.awk xcat_cfgloc | grep -v '<'|sed -e 's/&lt;/</' -e 's/&gt;/>/' -e 's/&amp;/&/' -e 's/&quot/"/' -e "s/&apos;/'/" > /etc/xcat/cfgloc
MYCONT=`cat /etc/xcat/cfgloc`
done
# if we successfully got cfgloc
if [ -f /etc/xcat/cfgloc ]; then
# if not DB2
grep "DB2" /etc/xcat/cfgloc 2>&1 1> /dev/null
if [ $? -ne 0 ]; then
sed s/host=[^\|]*/host=$MASTER/ /etc/xcat/cfgloc > /etc/xcat/cfgloc.new
mv /etc/xcat/cfgloc.new /etc/xcat/cfgloc
else # DB2 cfgloc has different format
if [ -n "$UPDATENODE" ] && [ $UPDATENODE -eq 1 ]; then
cp /etc/xcat/cfgloc /etc/xcat/cfgloc.db2
else
mv /etc/xcat/cfgloc /etc/xcat/cfgloc.db2
fi
fi
chmod 600 /etc/xcat/cfgloc*
fi
else # error from first getcredentials call
#the message received is an error from credentials.pm, so parse it
ERR_MSG=`sed -n 's%.*<error>\(.*\)</error>.*%\1%p' /etc/xcat/cfgloc`
logger -t xCAT -p local4.err xcatserver: $ERR_MSG
rm /etc/xcat/cfgloc
fi
kill -9 $CREDPID