useful_scripts/fix_cloud.sh
2021-10-12 20:24:31 +01:00

173 lines
5.4 KiB
Bash
Executable File

#!/bin/bash
set -ax
# This script is required after a reboot of the cloud after the cloud has been
# shut down
check_unit_status()
{
app_name=$1
status_check="$2"
unit_status=$(juju status --format json | jq -rc ".applications.${app_name}.units | to_entries[] | {sub:.key,status:.value[\"workload-status\"].message}")
app_units=$(echo ${unit_status} | jq .sub | sed s/\"//g)
num=0
for unit in ${app_units} ; do
this_unit_status=$(echo $unit_status | jq -rc . | grep ${unit} | jq .status | sed s/\"//g)
if [[ "${this_unit_status}" == "${status_check}" ]] ; then
(( num++ ))
fi
done
if [[ $num -ge 3 ]] ; then echo 1
else echo 0
fi
}
get_lead()
{
app_name=$1
units=$(juju status --format json | jq -rc ".applications.${app_name}.units | to_entries[] | .key")
for unit in ${units} ; do
is_leader=$(juju run --unit ${unit} "is-leader")
[[ "${is_leader}" == "True" ]] && unit_lead=${unit} && break
done
echo $unit_lead
}
juju-wait -v
mysql_status=$(juju status --format json | jq -rc ".applications.mysql.units | to_entries[] | {sub:.key,status:.value[\"workload-status\"].message}")
#{"sub":"mysql/0","status":"MySQL is down. Sequence Number: 102921. Safe To Bootstrap: 1"}
#{"sub":"mysql/1","status":"MySQL is down. Sequence Number: 102921. Safe To Bootstrap: 0"}
#{"sub":"mysql/2","status":"MySQL is down. Sequence Number: 102921. Safe To Bootstrap: 0"}
mysql_units=$(echo ${mysql_status} | jq .sub | sed s/\"//g)
bootstrap_unit=""
mysql_lead=$(get_lead mysql)
safe_to_bootstrap=$(echo $mysql_status | jq -rc . | grep "Safe To Bootstrap: 1" | jq .sub | sed s/\"//g)
if [[ -n "$safe_to_bootstrap" ]]
then
bootstrap_unit=$safe_to_bootstrap
else
#seq_number=$(echo $mysql_status | jq -rc . | grep "Sequence Number" | jq .status | sed s/\"//g)
seq_number=$(echo $mysql_status | jq -rc . | grep "Sequence Number" )
if [[ -n "${seq_number}" ]]
then
seqs=$(echo $seq_number | jq -rc ". | {sub:.sub,seq:(.status|split(\".\")[1]|split(\": \")[1])}")
uniq_seqs=$(echo $seqs| jq .seq | sed s/\"//g | sort -n | uniq)
seq_count=$(echo $uniq_seqs | xargs | wc -w)
if [[ ${seq_count} -eq 1 ]]
then # same seq numbers all round
bootstrap_unit=${mysql_lead}
else # we have different seq numbers
highest_seq=$(echo $seqs| jq .seq | sed s/\"//g | sort -n | uniq | head -n 1)
unit_high_seq=$(echo seqs | jq -rc . | grep ${highest_seq} | jq .sub | sed s/\"//g)
bootstrap_unit=${unit_high_seq}
fi
fi
fi
if [[ -n ${bootstrap_unit} ]]
then
juju run-action --wait ${bootstrap_unit} bootstrap-pxc
juju run --application mysql "hooks/update-status"
until [[ $(check_unit_status mysql "Unit waiting for cluster bootstrap") -eq 1 ]]
do
sleep 10
done
if [[ "${bootstrap_unit}" == "${mysql_lead}" ]] ; then
for unit in ${mysql_units}; do
if [[ "${unit}" != "${mysql_lead}" ]] ; then
juju run-action --wait ${unit} notify-bootstrapped
ran_bootstrap="true"
break
fi
done
else
juju run-action --wait ${mysql_lead} notify-bootstrapped
ran_bootstrap="true"
fi
juju run -a mysql "hooks/update-status"
until [[ $(check_unit_status mysql "Unit is ready") -eq 1 ]]
do
sleep 10
done
# This is so that nagios doesn't report that the mysql daemon is down
# although the process is running. juju will then automatically start
# the mysqld process
juju run --timeout 30s --unit ${bootstrap_unit} -- sudo reboot
fi
juju run -a nova-cloud-controller -- sudo systemctl restart nova-api-os-compute nova-conductor nova-consoleauth
juju run -a heat -- sudo systemctl restart heat-engine
juju run -a vault -- sudo systemctl restart vault
juju run -a nova-cloud-controller "hooks/update-status"
juju run -a heat "hooks/update-status"
# cleanup all crm resources
juju status --format json | jq ".applications | to_entries[] | select(.value[\"charm-name\"] == \"hacluster\") | .key" | sed s/\"//g | xargs -i juju run --unit "{}"/leader -- 'sudo crm_resource -l | sed s/:.*//g | uniq | xargs -i sudo crm resource cleanup \"\{\}\"'
cd ~/stsstack-bundles/openstack/arif/
./vault.sh
juju run -a vault "hooks/update-status"
# Wait 10 seconds, and ensure that vault is unsealed
echo "Sleeping 10 seconds to wait for vault to finalise unseal"
sleep 10
ceph_osds=$(juju status ceph-osd --format json | jq -rc ". | .applications[\"ceph-osd\"].units | to_entries[] | .key")
for ceph_osd in ${ceph_osds}
do
osds=$(juju ssh ${ceph_osd} -- sudo ceph-volume lvm list --format json | jq -rc ". | to_entries[] | {id:.key,key:.value[].tags[\"ceph.osd_fsid\"]}")
for osd in ${osds}; do
osd_id=$(echo $osd | jq .id | sed s/\"//g)
uuid=$(echo $osd | jq .key | sed s/\"//g)
juju ssh ${ceph_osd} -- sudo systemctl restart ceph-volume@lvm-${osd_id}-${uuid}
done
done
juju run -a ceph-osd "hooks/update-status"
lds_servers=$(juju status landscape-server --format json | jq -rc ". | .applications[\"landscape-server\"].units | to_entries[] | .key")
cat > /tmp/restart-landscape.sh << EOF
#!/bin/bash
sudo systemctl restart landscape-*
EOF
for lds_server in ${lds_servers}
do
juju scp /tmp/restart-landscape.sh ${lds_server}:.
juju ssh ${lds_server} chmod +x restart-landscape.sh
juju ssh ${lds_server} sudo ./restart-landscape.sh &
done
wait
juju run --all -- sudo systemctl restart systemd-resolved