mirror of
https://github.com/xcat2/xcat-core.git
synced 2025-06-13 09:50:19 +00:00
Change CUDA 9.2 installation document and example setup script for POWER 9 (#5074)
* Change cuda installation document for POWER 9 * Update the example script for CUDA setup on POWER 9 * Change CUDA installation document, ask user to use postscripts instead of postbootscripts * Tweak the nvidia_patch.conf * Redo initrd image for both diskless and diskfull compute node * Fix typo
This commit is contained in:
@ -189,24 +189,24 @@ xCAT includes a script, ``cuda_power9_setup`` as example, to help user handle th
|
||||
Diskful osimage
|
||||
^^^^^^^^^^^^^^^
|
||||
|
||||
For diskful deployment, there is no need to change the osimage definition. Instead, add this postscript to your compute node postbootscrtips list.
|
||||
For diskful deployment, there is no need to change the osimage definition. Instead, add this postscript to your compute node postbootscrtips list. ::
|
||||
|
||||
chdef p9compute -p postbootscripts=cuda_power9_setup
|
||||
chdef p9compute -p postscripts=cuda_power9_setup
|
||||
|
||||
Disless osimage
|
||||
^^^^^^^^^^^^^^^
|
||||
Diskless osimage
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
For diskless deployment, the script need to add to the postinstall script of the osimage. And it should be run in the chroot environment. Please refer the following commands as an example.
|
||||
For diskless deployment, the script need to add to the postinstall script of the osimage. And it should be run in the chroot environment. Please refer the following commands as an example. ::
|
||||
|
||||
mkdir -p /install/custom/netboot
|
||||
cp /opt/xcat/share/xcat/netboot/rh/compute.rhels7.ppc64le.postinstall /opt/xcat/share/xcat/netboot/rh/compute.rhels7.ppc64le.postinstall
|
||||
mkdir -p /install/custom/netboot/rh
|
||||
cp /opt/xcat/share/xcat/netboot/rh/compute.rhels7.ppc64le.postinstall /install/custom/netboot/rh/cudafull.rhels7.ppc64le.postinstall
|
||||
|
||||
cat >>/install/custom/netboot/rh/cudafull.rhels7.ppc64le.postinstall <-EOF
|
||||
cat >>/install/custom/netboot/rh/cudafull.rhels7.ppc64le.postinstall <<-EOF
|
||||
|
||||
cp /install/postscripts/cuda_power9_setup /install/netboot/rhels7.5/ppc64le/compute/rootimg/tmp/cuda_power9_setup"
|
||||
chroot /install/netboot/rhels7.5/ppc64le/compute/rootimg" /tmp/cuda_power9_setup
|
||||
cp /install/postscripts/cuda_power9_setup \$installroot/tmp/cuda_power9_setup
|
||||
chroot \$installroot /tmp/cuda_power9_setup
|
||||
|
||||
rm -f /install/netboot/rhels7.5/ppc64le/compute/rootimg/tmp/cuda_power9_setup
|
||||
rm -f \$installroot/tmp/cuda_power9_setup
|
||||
EOF
|
||||
|
||||
chdef -t osimage rhels7.5-ppc64le-netboot-cudafull postinstall=/opt/xcat/share/xcat/netboot/rh/compute.rhels7.ppc64le.postinstall
|
||||
chdef -t osimage rhels7.5-ppc64le-netboot-cudafull postinstall=/install/custom/netboot/rh/cudafull.rhels7.ppc64le.postinstall
|
||||
|
@ -40,3 +40,40 @@ chmod 0644 /usr/lib/systemd/system/nvidia-persistenced.service
|
||||
systemctl enable nvidia-persistenced
|
||||
|
||||
rm -f /lib/udev/rules.d/40-redhat.rules
|
||||
|
||||
#
|
||||
# Extra steps for passing parameters to kernel module nvidia
|
||||
#
|
||||
mkdir -p /usr/lib/dracut/modules.d/95nvidia
|
||||
cat >/usr/lib/dracut/modules.d/95nvidia/module-setup.sh <<EOF
|
||||
#!/bin/bash
|
||||
# module setup file for dracut
|
||||
# nvidia patch described in:
|
||||
|
||||
check() {
|
||||
return 0
|
||||
}
|
||||
|
||||
depends() {
|
||||
return 0
|
||||
}
|
||||
|
||||
installkernel() {
|
||||
return 0
|
||||
}
|
||||
|
||||
install() {
|
||||
[ -d \$initdir/etc/modprobe.d/ ] || mkdir \$initdir/etc/modprobe.d
|
||||
echo 'options nvidia NVreg_RegistryDwords="RMNumaOnlining=0x1;RMNvLinkSpeedControl=0x9;PeerMappingOverride=1"' >\$initdir/etc/modprobe.d/nvidia.conf
|
||||
}
|
||||
EOF
|
||||
|
||||
chmod 0755 /usr/lib/dracut/modules.d/95nvidia/module-setup.sh
|
||||
echo 'add_dracutmodules+=" nvidia "' >/etc/dracut.conf.d/nvidia_patch.conf
|
||||
|
||||
# Patch for the diskfull environment, or in case kernel module nvidia need to be reloaded
|
||||
echo 'options nvidia NVreg_RegistryDwords="RMNumaOnlining=0x1;RMNvLinkSpeedControl=0x9;PeerMappingOverride=1"' >/etc/modprobe.d/nvidia.conf
|
||||
|
||||
# Redo the initrd image
|
||||
kernel_version="$(for d in $(ls /lib/modules | sort -V) ; do : ; done && echo $d)"
|
||||
mkinitrd -v -f "/boot/initramfs-${kernel_version}.img" "${kernel_version}"
|
||||
|
Reference in New Issue
Block a user