From d4a1e82d5d399df66eeb7abd8c69171d294913b6 Mon Sep 17 00:00:00 2001 From: Yuan Bai Date: Tue, 24 Oct 2017 10:21:32 +0800 Subject: [PATCH] enhance HA scripts deactivate-mn and activate-mn (#4120) * enhance doc for scripts changes * enhance activate-mn/deactivate-mn --- .../setup_ha_mgmt_node_with_shared_data.rst | 34 ++++- xCAT-server/share/xcat/hamn/activate-mn | 137 +++++++++++++++--- xCAT-server/share/xcat/hamn/deactivate-mn | 82 ++++++++--- 3 files changed, 205 insertions(+), 48 deletions(-) diff --git a/docs/source/advanced/hamn/setup_ha_mgmt_node_with_shared_data.rst b/docs/source/advanced/hamn/setup_ha_mgmt_node_with_shared_data.rst index 865612c9c..5a2d7ae3f 100644 --- a/docs/source/advanced/hamn/setup_ha_mgmt_node_with_shared_data.rst +++ b/docs/source/advanced/hamn/setup_ha_mgmt_node_with_shared_data.rst @@ -289,10 +289,25 @@ In a planned failover, you can do necessary cleanup work on the previous primary Take down the Current Primary Management Node --------------------------------------------- -xCAT ships a sample script ``/opt/xcat/share/xcat/hamn/deactivate-mn`` to make the machine be a standby management node. Before using this script, you need to review the script carefully and make updates accordingly, here is an example of how to use this script: :: +xCAT ships a sample script ``/opt/xcat/share/xcat/hamn/deactivate-mn`` to make the machine be a standby management node. Before using this script, you need to review the script carefully and make updates accordingly, correct the following variable values in this script: :: + + SHAREDVG=sharedvg # For AIX, shared volumn group name + DBDIR=/var/lib/pgsql # database directory + SHAREDFS="/install /etc/xcat /root/.xcat /tftpboot" # Shared file systems + USEDB2=no # if DB2 is being used + USETEAL=no # if TEAL is being used + USEDFM=no # if DFM is being used + USEUNMOUNT=yes # umount share data directory + POWER775=no # Power 775 cluster + CHANGEHOSTNAME=no # change the hostname from virtual ip hostname to the original hostname + xcatdb=postgresql # support postgrel,DB2,mysql + +Here is an example of how to use this script: :: /opt/xcat/share/xcat/hamn/deactivate-mn -i eth1:2 -v 9.114.47.97 +**Notes**: This script will be over-written after xCAT is upgraded. If this script is customized, make sure backup it before upgrading xCAT. + On the current primary management node: If the management node is still available and running the cluster, perform the following steps to shutdown. @@ -339,11 +354,26 @@ If the management node is still available and running the cluster, perform the f Bring up the New Primary Management Node ---------------------------------------- +xCAT ships a sample script ``/opt/xcat/share/xcat/hamn/activate-mn`` to make the machine be a new primary management node. Before using this script, you need to review the script carefully and make updates accordingly, correct the following variable values in this script: :: -Execute script ``/opt/xcat/share/xcat/hamn/activate-mn`` to make the machine be a primary management node: :: + SHAREDVG=sharedvg # For AIX, shared volumn group name + DBDIR=/var/lib/pgsql # database directory + SHAREDFS="/install /etc/xcat /root/.xcat" # Shared file systems + USEDFM=no # if DFM is being used + USENTP=no # if NTP is being used + POWER775=no # Power 775 cluster + USETEAL=no # if TEAL is being used + USEMOUNT=yes # mounted filesystems in the file /etc/fstab + CHANGEHOSTNAME=yes # set the hostname to the virtual ip address hostname + RESTARTDNS=yes # if yes, will execute "makedns -n" + xcatdb=postgresql # support postgrel,DB2,mysql + +Here is an example of how to use this script to make the machine be a primary management node: :: /opt/xcat/share/xcat/hamn/activate-mn -i eth1:2 -v 9.114.47.97 -m 255.255.255.0 +**Notes**: This script will be over-written after xCAT is upgraded. If this script is customized, make sure backup it before upgrading xCAT. + On the new primary management node: #. Configure Virtual IP: :: diff --git a/xCAT-server/share/xcat/hamn/activate-mn b/xCAT-server/share/xcat/hamn/activate-mn index 8c73d14b4..ef072a597 100755 --- a/xCAT-server/share/xcat/hamn/activate-mn +++ b/xCAT-server/share/xcat/hamn/activate-mn @@ -25,18 +25,19 @@ # ----------------------------------------------------------------------------- # 2011-12-08 JDW 1.0 Original # 2013-06-06 xCAT 1.1 Updated version to ship with xCAT - +# 2017-10-17 xCAT 2.13.8 Enhance SHAREDVG=sharedvg # For AIX, shared volumn group name -SHAREDFS="/install /etc/xcat ~/.xcat" # Shared file systems -DB2DIR=/db2database # DB2 only, the db2 database directory +DBDIR=/var/lib/pgsql # database directory +SHAREDFS="/install /etc/xcat /root/.xcat" # Shared file systems USEDFM=no # if DFM is being used USENTP=no # if NTP is being used POWER775=no # Power 775 cluster USETEAL=no # if TEAL is being used -CHANGEHOSTNAME=no # set the hostname to the virtual ip address hostname - - -xcatdb=`XCATBYPASS=1 /opt/xcat/bin/lsxcatd -d | cut -f 2 -d "=" 2>/dev/null` +USEMOUNT=yes # mounted filesystems in the file /etc/fstab +CHANGEHOSTNAME=yes # set the hostname to the virtual ip address hostname +RESTARTDNS=yes # if yes, will execute "makedns -n" +exit_code=0 +xcatdb=postgresql # support postgrel,DB2,mysql usage() { @@ -49,7 +50,31 @@ EOF runcmd() { echo "Running command: $@" - $@ + a=0 + while true + do + if [ $a -eq 5 ]; then + echo "$@ [Failed]" + exit_code=1 + return 1 + elif [ $a -gt 0 ]; then + sleep 3 + echo "Retry $a ...... $@" + fi + $@ + if [ $? -eq 0 ]; then + echo "$@ [Passed]" + return 0 + else + a=$[$a+1] + fi + done +} + +clear_env() +{ + NIC=$1 + ifconfig $NIC 0.0.0.0 0.0.0.0 2>/dev/null } ################################################################################ @@ -133,15 +158,48 @@ then runcmd "varyonvg $SHAREDVG" fi -for fs in $SHAREDFS -do - runcmd "mount $fs" -done - -#DB2 only -if [ $xcatdb = "DB2" ] +if [ $USEMOUNT = "yes" ] then - runcmd "mount $DB2DIR" + for fs in $SHAREDFS $DBDIR + do + runcmd "mount $fs" + done +else + echo "Warning: share data $SHAREDFS $DBDIR should be mounted before." +fi + +# check if DB is matched in linux +if [ `uname` = "Linux" ]; then + echo "Check DB...." + if [ -s /etc/xcat/cfgloc ] + then + xcatcfg=`cat /etc/xcat/cfgloc|cut -f 1 -d : 2>/dev/null` + else + # SQLsite + xcatcfg=`XCATBYPASS=1 /opt/xcat/bin/lsxcatd -d | cut -f 2 -d "=" 2>/dev/null` + # filter Error message from lsxcatd + echo "$xcatcfg" | grep "Error" && xcatcfg="" + fi + if [ -n "$xcatcfg" ]; then + current_db=$xcatcfg + if [ $xcatcfg = "Pg" ] + then + current_db="postgresql" + fi + echo "Current database is $current_db." + if [ $current_db != "$xcatdb" ] + then + echo "Error: current DB $current_db is not matched with $xcatdb. [Failed]" + clear_env $NIC + exit 1 + else + echo "DB is matched with $xcatdb [Passed]" + fi + else + echo "Error: can not get current DB. [Failed]" + clear_env $NIC + exit 1 + fi fi # Start NFS @@ -167,31 +225,57 @@ then fi ############################################################################## - echo "Starting $xcatdb...." if [ `uname` = "AIX" ] then if [ $xcatdb = "DB2" ] then runcmd "su - xcatdb -c \"db2start\"" - elif [ $xcatdb = "mysql" ] + elif [ $DATABASE = "mysql" ] then runcmd "startsrc -g mysql" fi else # Linux if [ $xcatdb != "SQLite" ] then - runcmd "service $xcatdb start 2>&1 1>/dev/null" + runcmd "service $xcatdb start" + if [ $? -eq 0 ] + then + XCATBYPASS=1 tabdump site >/dev/null + if [ $? -ne 0 ] + then + echo "Error: cannot connect $xcatdb [Failed], exit...." + clear_env $NIC + exit 1 + fi + else + echo "Error: service $xcatdb start [Failed], exit...." + clear_env $NIC + exit 1 + fi fi fi - echo "Starting xCAT...." if [ `uname` = "AIX" ] then runcmd "/opt/xcat/sbin/restartxcatd" else # Linux runcmd "service xcatd restart" + if [ $? -ne 0 ] + then + echo "Error: failed to restart xcatd. exiting...." + clear_env $NIC + exit 1 + else + tabdump site >/dev/null + if [ $? -ne 0 ] + then + echo "Error: failed to restart xcatd. exiting...." + clear_env $NIC + exit 1 + fi + fi fi #echo "Start xCAT on the Service Nodes...." @@ -230,6 +314,11 @@ then fi ############################################################################## +#Re-make dns +if [ $RESTARTDNS = "yes" ] +then + runcmd "makedns -n" +fi # Re-make DHCP definitions echo "Making upto date DHCP configuration file...." @@ -281,5 +370,9 @@ fi echo "" echo "" -echo "This machine is set to active management node successfully, enjoy..." -exit 0 +if [ $exit_code -eq 0 ]; then + echo "This machine is set to active management node successfully, enjoy..." +else + echo "This machine is set to active management node, but some service failed..." +fi +exit $exit_code diff --git a/xCAT-server/share/xcat/hamn/deactivate-mn b/xCAT-server/share/xcat/hamn/deactivate-mn index 9a4ab6ddf..a5d380d2f 100755 --- a/xCAT-server/share/xcat/hamn/deactivate-mn +++ b/xCAT-server/share/xcat/hamn/deactivate-mn @@ -24,15 +24,18 @@ # ----------------------------------------------------------------------------- # 2011-12-08 JDW 1.0 Original # 2013-06-06 xCAT 1.1 Updated version to ship with xCAT +# 2017-10-12 xCAT 2.13.8 Enhance SHAREDVG=sharedvg # For AIX, shared volumn group name -SHAREDFS="/install /etc/xcat ~/.xcat" # Shared file systems -DB2DIR=/db2database # DB2 only, the db2 database directory +DBDIR=/var/lib/pgsql # database directory +SHAREDFS="/install /etc/xcat /root/.xcat /tftpboot" # Shared file systems +USEDB2=no # if DB2 is being used USETEAL=no # if TEAL is being used USEDFM=no # if DFM is being used +USEUNMOUNT=yes # umount share data directory POWER775=no # Power 775 cluster CHANGEHOSTNAME=no # change the hostname from virtual ip hostname to the original hostname - -xcatdb=`XCATBYPASS=1 /opt/xcat/bin/lsxcatd -d | cut -f 2 -d "=" 2>/dev/null` +exit_code=0 +xcatdb=postgresql # support postgrel,DB2,mysql usage() { @@ -45,7 +48,25 @@ EOF runcmd() { echo "Running command: $@" - $@ + a=0 + while true + do + if [ $a -eq 5 ]; then + echo "$@ [Failed]" + exit_code=1 + return 1 + elif [ $a -gt 0 ]; then + sleep 3 + echo "Retry $a ...... $@" + fi + $@ + if [ $? -eq 0 ]; then + echo "$@ [Passed]" + return 0 + else + a=$[$a+1] + fi + done } ################################################################################ @@ -154,7 +175,7 @@ if [ `uname` = "AIX" ] then stopsrc -s xcatd else # Linux - service xcatd stop + runcmd "service xcatd stop" fi echo "Stopping database $xcatdb...." @@ -170,15 +191,16 @@ then else # Linux if [ $xcatdb != "SQLite" ] then - runcmd "service $xcatdb stop 2>&1 1>/dev/null" + runcmd "service $xcatdb stop" fi fi ############################################################################## - -# Kill nicely, wait a little time for processes to clean-up.... -for fs in $SHAREDFS $DB2DIR -do +if [ $USEDB2 = "yes" ] +then + # Kill nicely, wait a little time for processes to clean-up.... + for fs in $SHAREDFS $DBDIR + do echo "Killing processes (nicely) accessing filesystem ${fs}...." if [ `uname` = "AIX" ] then @@ -186,7 +208,8 @@ do else # Linux runcmd "fuser -u -k ${fs}" fi -done + done +fi # AIX only, Linux has the -l flag could hanldle everything if [ `uname` = "AIX" ] @@ -231,14 +254,15 @@ then runcmd "umount $fs" done else - for fs in $SHAREDFS - do - runcmd "umount -l $fs" - done + if [ $USEUNMOUNT = "yes" ] + then + for fs in $SHAREDFS $DBDIR + do + runcmd "umount -l $fs" + done + fi fi - - if [ `uname` = "AIX" ] then varyoffvg sharedvg @@ -257,22 +281,32 @@ then BOOT_HOSTNAME=`grep HOSTNAME /etc/sysconfig/network | cut -f 2 -d "="` fi fi - hostname ${BOOT_HOSTNAME} + runcmd "hostname ${BOOT_HOSTNAME}" fi # Remove virtual ip addresses # Firstly determine the correct IP addresses for each interface -echo "Removing IP Aliases...." +echo "Removing IP Aliase...." if [ `uname` = "AIX" ] then runcmd "ifconfig $NIC delete $VIP" else # Linux - runcmd "ifconfig $NIC 0.0.0.0 0.0.0.0" + ifconfig $NIC 0.0.0.0 0.0.0.0 + ip addr show |grep $VIP 2>&1 1>/dev/null + if [ $? -ne 0 ]; then + echo "IP Aliase $NIC $VIP is removed.[Passed]" + else + echo "Fail to remove IP Aliase $NIC $VIP. [Failed]" + fi fi echo "" echo "" -echo "This machine is set to standby management node successfully..." - -exit 0 +if [ $exit_code -eq 0 ] +then + echo "This machine is set to standby management node successfully..." +else + echo "This machine is set to standby management node, but some service are failed." +fi +exit $exit_code