diff --git a/docs/source/advanced/gpu/nvidia/osimage/rhels.rst b/docs/source/advanced/gpu/nvidia/osimage/rhels.rst index 77dee5f3f..a810fddee 100644 --- a/docs/source/advanced/gpu/nvidia/osimage/rhels.rst +++ b/docs/source/advanced/gpu/nvidia/osimage/rhels.rst @@ -189,24 +189,24 @@ xCAT includes a script, ``cuda_power9_setup`` as example, to help user handle th Diskful osimage ^^^^^^^^^^^^^^^ -For diskful deployment, there is no need to change the osimage definition. Instead, add this postscript to your compute node postbootscrtips list. +For diskful deployment, there is no need to change the osimage definition. Instead, add this postscript to your compute node postbootscrtips list. :: - chdef p9compute -p postbootscripts=cuda_power9_setup + chdef p9compute -p postscripts=cuda_power9_setup -Disless osimage -^^^^^^^^^^^^^^^ +Diskless osimage +^^^^^^^^^^^^^^^^ -For diskless deployment, the script need to add to the postinstall script of the osimage. And it should be run in the chroot environment. Please refer the following commands as an example. +For diskless deployment, the script need to add to the postinstall script of the osimage. And it should be run in the chroot environment. Please refer the following commands as an example. :: - mkdir -p /install/custom/netboot - cp /opt/xcat/share/xcat/netboot/rh/compute.rhels7.ppc64le.postinstall /opt/xcat/share/xcat/netboot/rh/compute.rhels7.ppc64le.postinstall + mkdir -p /install/custom/netboot/rh + cp /opt/xcat/share/xcat/netboot/rh/compute.rhels7.ppc64le.postinstall /install/custom/netboot/rh/cudafull.rhels7.ppc64le.postinstall - cat >>/install/custom/netboot/rh/cudafull.rhels7.ppc64le.postinstall <-EOF + cat >>/install/custom/netboot/rh/cudafull.rhels7.ppc64le.postinstall <<-EOF - cp /install/postscripts/cuda_power9_setup /install/netboot/rhels7.5/ppc64le/compute/rootimg/tmp/cuda_power9_setup" - chroot /install/netboot/rhels7.5/ppc64le/compute/rootimg" /tmp/cuda_power9_setup + cp /install/postscripts/cuda_power9_setup \$installroot/tmp/cuda_power9_setup + chroot \$installroot /tmp/cuda_power9_setup - rm -f /install/netboot/rhels7.5/ppc64le/compute/rootimg/tmp/cuda_power9_setup + rm -f \$installroot/tmp/cuda_power9_setup EOF - chdef -t osimage rhels7.5-ppc64le-netboot-cudafull postinstall=/opt/xcat/share/xcat/netboot/rh/compute.rhels7.ppc64le.postinstall + chdef -t osimage rhels7.5-ppc64le-netboot-cudafull postinstall=/install/custom/netboot/rh/cudafull.rhels7.ppc64le.postinstall diff --git a/xCAT/postscripts/cuda_power9_setup b/xCAT/postscripts/cuda_power9_setup index d2120f465..caf0cddb5 100755 --- a/xCAT/postscripts/cuda_power9_setup +++ b/xCAT/postscripts/cuda_power9_setup @@ -40,3 +40,40 @@ chmod 0644 /usr/lib/systemd/system/nvidia-persistenced.service systemctl enable nvidia-persistenced rm -f /lib/udev/rules.d/40-redhat.rules + +# +# Extra steps for passing parameters to kernel module nvidia +# +mkdir -p /usr/lib/dracut/modules.d/95nvidia +cat >/usr/lib/dracut/modules.d/95nvidia/module-setup.sh <\$initdir/etc/modprobe.d/nvidia.conf +} +EOF + +chmod 0755 /usr/lib/dracut/modules.d/95nvidia/module-setup.sh +echo 'add_dracutmodules+=" nvidia "' >/etc/dracut.conf.d/nvidia_patch.conf + +# Patch for the diskfull environment, or in case kernel module nvidia need to be reloaded +echo 'options nvidia NVreg_RegistryDwords="RMNumaOnlining=0x1;RMNvLinkSpeedControl=0x9;PeerMappingOverride=1"' >/etc/modprobe.d/nvidia.conf + +# Redo the initrd image +kernel_version="$(for d in $(ls /lib/modules | sort -V) ; do : ; done && echo $d)" +mkinitrd -v -f "/boot/initramfs-${kernel_version}.img" "${kernel_version}"