2
0
mirror of https://github.com/xcat2/xcat-core.git synced 2025-06-13 09:50:19 +00:00

Change CUDA 9.2 installation document and example setup script for POWER 9 (#5074)

* Change cuda installation document for POWER 9

* Update the example script for CUDA setup on POWER 9

* Change CUDA installation document, ask user to use postscripts instead of postbootscripts

* Tweak the nvidia_patch.conf

* Redo initrd image for both diskless and diskfull compute node

* Fix typo
This commit is contained in:
Gᴏɴɢ Jie
2018-04-12 17:19:28 +08:00
committed by Bin Xu
parent dad0167c27
commit dbe5557697
2 changed files with 49 additions and 12 deletions

View File

@ -189,24 +189,24 @@ xCAT includes a script, ``cuda_power9_setup`` as example, to help user handle th
Diskful osimage
^^^^^^^^^^^^^^^
For diskful deployment, there is no need to change the osimage definition. Instead, add this postscript to your compute node postbootscrtips list.
For diskful deployment, there is no need to change the osimage definition. Instead, add this postscript to your compute node postbootscrtips list. ::
chdef p9compute -p postbootscripts=cuda_power9_setup
chdef p9compute -p postscripts=cuda_power9_setup
Disless osimage
^^^^^^^^^^^^^^^
Diskless osimage
^^^^^^^^^^^^^^^^
For diskless deployment, the script need to add to the postinstall script of the osimage. And it should be run in the chroot environment. Please refer the following commands as an example.
For diskless deployment, the script need to add to the postinstall script of the osimage. And it should be run in the chroot environment. Please refer the following commands as an example. ::
mkdir -p /install/custom/netboot
cp /opt/xcat/share/xcat/netboot/rh/compute.rhels7.ppc64le.postinstall /opt/xcat/share/xcat/netboot/rh/compute.rhels7.ppc64le.postinstall
mkdir -p /install/custom/netboot/rh
cp /opt/xcat/share/xcat/netboot/rh/compute.rhels7.ppc64le.postinstall /install/custom/netboot/rh/cudafull.rhels7.ppc64le.postinstall
cat >>/install/custom/netboot/rh/cudafull.rhels7.ppc64le.postinstall <-EOF
cat >>/install/custom/netboot/rh/cudafull.rhels7.ppc64le.postinstall <<-EOF
cp /install/postscripts/cuda_power9_setup /install/netboot/rhels7.5/ppc64le/compute/rootimg/tmp/cuda_power9_setup"
chroot /install/netboot/rhels7.5/ppc64le/compute/rootimg" /tmp/cuda_power9_setup
cp /install/postscripts/cuda_power9_setup \$installroot/tmp/cuda_power9_setup
chroot \$installroot /tmp/cuda_power9_setup
rm -f /install/netboot/rhels7.5/ppc64le/compute/rootimg/tmp/cuda_power9_setup
rm -f \$installroot/tmp/cuda_power9_setup
EOF
chdef -t osimage rhels7.5-ppc64le-netboot-cudafull postinstall=/opt/xcat/share/xcat/netboot/rh/compute.rhels7.ppc64le.postinstall
chdef -t osimage rhels7.5-ppc64le-netboot-cudafull postinstall=/install/custom/netboot/rh/cudafull.rhels7.ppc64le.postinstall

View File

@ -40,3 +40,40 @@ chmod 0644 /usr/lib/systemd/system/nvidia-persistenced.service
systemctl enable nvidia-persistenced
rm -f /lib/udev/rules.d/40-redhat.rules
#
# Extra steps for passing parameters to kernel module nvidia
#
mkdir -p /usr/lib/dracut/modules.d/95nvidia
cat >/usr/lib/dracut/modules.d/95nvidia/module-setup.sh <<EOF
#!/bin/bash
# module setup file for dracut
# nvidia patch described in:
check() {
return 0
}
depends() {
return 0
}
installkernel() {
return 0
}
install() {
[ -d \$initdir/etc/modprobe.d/ ] || mkdir \$initdir/etc/modprobe.d
echo 'options nvidia NVreg_RegistryDwords="RMNumaOnlining=0x1;RMNvLinkSpeedControl=0x9;PeerMappingOverride=1"' >\$initdir/etc/modprobe.d/nvidia.conf
}
EOF
chmod 0755 /usr/lib/dracut/modules.d/95nvidia/module-setup.sh
echo 'add_dracutmodules+=" nvidia "' >/etc/dracut.conf.d/nvidia_patch.conf
# Patch for the diskfull environment, or in case kernel module nvidia need to be reloaded
echo 'options nvidia NVreg_RegistryDwords="RMNumaOnlining=0x1;RMNvLinkSpeedControl=0x9;PeerMappingOverride=1"' >/etc/modprobe.d/nvidia.conf
# Redo the initrd image
kernel_version="$(for d in $(ls /lib/modules | sort -V) ; do : ; done && echo $d)"
mkinitrd -v -f "/boot/initramfs-${kernel_version}.img" "${kernel_version}"