From 03bd6f3f24ea9c58eab6410efb784c420a290386 Mon Sep 17 00:00:00 2001 From: Victor Hu Date: Wed, 17 Apr 2019 22:24:25 -0400 Subject: [PATCH] Update the cuda setup script to account for difference in rhels 7.5 and 7.6 and some enhancements --- xCAT/postscripts/cuda_power9_setup | 39 +++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/xCAT/postscripts/cuda_power9_setup b/xCAT/postscripts/cuda_power9_setup index 23a9f8aa7..d3e5b33dc 100755 --- a/xCAT/postscripts/cuda_power9_setup +++ b/xCAT/postscripts/cuda_power9_setup @@ -15,19 +15,50 @@ umask 0022 -mkdir ${IMG_ROOTIMGDIR}/etc/systemd/system/nvidia-persistenced.service.d +echo "==> Override settings in /etc/systemd/system/nvidia-persistenced.service.d..." +mkdir -p ${IMG_ROOTIMGDIR}/etc/systemd/system/nvidia-persistenced.service.d printf '[Service]\nPIDFile=/var/run/nvidia-persistenced/nvidia-persistenced.pid\n' > ${IMG_ROOTIMGDIR}/etc/systemd/system/nvidia-persistenced.service.d/pidfile.conf printf '[Service]\nRestart=always\n' > ${IMG_ROOTIMGDIR}/etc/systemd/system/nvidia-persistenced.service.d/restartalways.conf [ ! -z "${IMG_ROOTIMGDIR}" ] && CHROOTCMD="chroot ${IMG_ROOTIMGDIR}" $CHROOTCMD /bin/bash -c "systemctl enable nvidia-persistenced" -if [ -f ${IMG_ROOTIMGDIR}/lib/udev/rules.d/40-redhat.rules ] -then - sed -i /SUBSYSTEM==\"memory\"/d ${IMG_ROOTIMGDIR}/lib/udev/rules.d/40-redhat.rules +# Disable a udev rule installed by default in some Linux distributions that cause hot-pluggable +# memory to be automatically onlined when it is physically probed. +# +# The overrides for /lib/udev rules should be done in /etc/udev +# +UDEV_REDHAT_SOURCE=${IMG_ROOTIMGDIR}/lib/udev/rules.d/40-redhat.rules +UDEV_REDHAT_TARGET=${IMG_ROOTIMGDIR}/etc/udev/rules.d/40-redhat.rules + +# If the file does not exist in /etc/udev, copy it from /lib/udev +if [ ! -e ${UDEV_REDHAT_TARGET} ]; then + cp -n ${UDEV_REDHAT_SOURCE} ${UDEV_REDHAT_TARGET} fi +# Disable udev memory auto-onlining Rule for cuda10.x +# +# For RHELS 7.5 ALT +# +sed -i "s/^\(SUBSYSTEM==\"memory\".*\)/#\1/" ${UDEV_REDHAT_TARGET} +# +# For RHELS 7.6 ALT +# +if [[ `grep 'Memory hotadd request' ${UDEV_REDHAT_TARGET} 2>&1 >> /dev/null && grep 'LABEL="memory_hotplug_end' ${UDEV_REDHAT_TARGET} 2>&1 >> /dev/null; echo $?` == 0 ]]; then + echo "Detected RHELS 7.6 ALT, modifying ${UDEV_REDHAT_TARGET}..." + # Comment out the memory hotadd request (for reference) + if [[ `grep "## Memory hotadd request" ${UDEV_REDHAT_TARGET} 2>&1 >> /dev/null; echo $?` != 0 ]]; then + # but only run one time, not if it's already commented out. (to handle multiple genimage calls) + sed -i '/Memory hotadd request/,+8 s/^/#/' ${UDEV_REDHAT_TARGET} + fi +fi + +echo "Comparing ${UDEV_REDHAT_SOURCE} and ${UDEV_REDHAT_TARGET}" +diff ${UDEV_REDHAT_SOURCE} ${UDEV_REDHAT_TARGET} + +echo "==> Setting NVIDIA options in /usr/lib/modprobe.d/nvidia.conf..." echo 'options nvidia NVreg_EnableStreamMemOPs=1 NVreg_RegistryDwords="PeerMappingOverride=1"' >${IMG_ROOTIMGDIR}/usr/lib/modprobe.d/nvidia.conf +echo 'blacklist nouveau' >> ${IMG_ROOTIMGDIR}/usr/lib/modprobe.d/nvidia.conf if [ -z "${IMG_ROOTIMGDIR}" ] then