slurm-lxd/create_cluster.sh

87 lines
2.0 KiB
Bash
Raw Permalink Normal View History

2023-08-08 20:22:09 +00:00
#!/bin/bash
set -ax
project_name="hpc"
project="--project ${project_name}"
lxc="lxc ${project}"
lxc_exec="${lxc} exec"
lxc project create ${project_name}
${lxc} profile edit default < lxd_profile.yaml
2023-09-05 16:08:56 +00:00
ppa="ppa:ubuntu-hpc/slurm-wlm-23.02"
2023-08-08 20:22:09 +00:00
mgmt_node="slurm-master"
${lxc} launch ubuntu:jammy ${mgmt_node}
sleep 5
${lxc_exec} ${mgmt_node} -- add-apt-repository -y ${ppa}
${lxc_exec} ${mgmt_node} -- apt -y install slurm-wlm
${lxc} file push munge.key ${mgmt_node}/etc/munge/
${lxc_exec} ${mgmt_node} -- systemctl restart munge
${lxc_exec} ${mgmt_node} -- systemctl enable munge
${lxc} file push slurm.conf ${mgmt_node}/etc/slurm/
${lxc_exec} ${mgmt_node} -- mkdir /var/spool/slurmctld
${lxc_exec} ${mgmt_node} -- chown slurm:slurm /var/spool/slurmctld
${lxc_exec} ${mgmt_node} -- systemctl start slurmctld
${lxc_exec} ${mgmt_node} -- systemctl enable slurmctld
${lxc_exec} ${mgmt_node} -- systemctl start slurmd
${lxc_exec} ${mgmt_node} -- systemctl enable slurmd
nodes=5
for i in `seq 1 ${nodes}`
do
node=slurm-comp0${i}
${lxc} launch ubuntu:jammy ${node}
sleep 5
${lxc} file push munge.key ${node}/etc/munge/
${lxc_exec} ${node} -- systemctl restart munge
${lxc_exec} ${node} -- systemctl enable munge
done
tmp_hosts=$(mktemp)
cat > ${tmp_hosts} << EOF
127.0.0.1 localhost
# The following lines are desirable for IPv6 capable hosts
::1 ip6-localhost ip6-loopback
fe00::0 ip6-localnet
ff00::0 ip6-mcastprefix
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters
ff02::3 ip6-allhosts
EOF
addresses=$(${lxc} list --format json | jq -rc '.[] | select(.name | contains("slurm")) | [.name,.state.network.eth0.addresses[].address]')
for line in ${addresses}
do
name=$(echo $line | jq -rc .[0])
ipv4=$(echo $line | jq -rc .[1])
echo ${ipv4} ${name} >> ${tmp_hosts}
done
for line in ${addresses}
do
name=$(echo $line | jq -rc .[0])
${lxc} file push ${tmp_hosts} ${name}/etc/hosts
${lxc_exec} ${name} -- sudo systemctl restart slurmd
done
${lxc_exec} ${mgmt_node} -- sudo systemctl restart slurmctld