################################################################################ # Create a generic HPC image # This script is an example of how to create an HPC image with xCAT stateless. # in this example you may need to change some of the parameters. In the future # we should probably place this in the database... # All good sys admins can reproduce their environments. Creating a stateless # image should be reproducable. Doing this with a script is a good idea. # ################################################################################ # specify the OS stuff. You'll probably want to change this. If you do then you'll # need to make sure there is a *.pkglist and *.exlist PROFILE=compute ARCH=x86_64 OS=centos5.3 # this is where the OS image will reside IMGROOT=/install/netboot/$OS/$ARCH/$PROFILE/rootimg # remove the old image if it exists. rm -rf /install/netboot/$OS/$ARCH/$PROFILE # generate the base image # for IBM: tg3 in older blades, bnx in newer stuff, and igb in the nehalem based products. /opt/xcat/share/xcat/netboot/centos/genimage -i eth0 -n libphy,tg3,bnx2,igb -o $OS -p $PROFILE # update the yum database in the image rm -rf $IMGROOT/etc/yum.repos.d yum --installroot=$IMGROOT clean all cp -a /etc/yum.repos.d/ $IMGROOT/etc/ rm -rf $IMGROOT/etc/yum.repos.d/xCAT-* # no selinux!! echo SELINUX=disabled > $IMGROOT/etc/sysconfig/selinux # fix respawns cp $IMGROOT/etc/inittab $IMGROOT/etc/inittab.ORIG grep -v 38400 $IMGROOT/etc/inittab.ORIG > $IMGROOT/etc/inittab # filesystem setup, you'll want to customize this. We have swap and scratch # configured here. But if you don't have disks then this may not help. # we name our / partition the image name (compute by default) plus a timestamp. TIME=`date +%s` cp $IMGROOT/etc/fstab $IMGROOT/etc/fstab.ORIG echo "proc /proc proc rw 0 0 sysfs /sys sysfs rw 0 0 devpts /dev/pts devpts rw,gid=5,mode=620 0 0 $PROFILE-$TIME / tmpfs rw 0 1 #/dev/sda1 swap swap defaults 0 0 #/dev/sda2 /scratch ext3 defaults 0 0 " > $IMGROOT/etc/fstab mkdir $IMGROOT/scratch ### get ntp to start on boot chroot $IMGROOT chkconfig --level 345 ntpd on #### user stuff so that we can do some work #### cp /etc/shadow $IMGROOT/etc/ cp /etc/group $IMGROOT/etc/ cp /etc/passwd $IMGROOT/etc/ cp /etc/hosts $IMGROOT/etc/ ### scaling fixes ##### echo "# increase TCP max buffer size net.core.rmem_max = 33554432 net.core.wmem_max = 33554432 net.core.rmem_default = 65536 net.core.wmem_default = 65536 # increase Linux autotuning TCP buffer limits # min, default, and max number of bytes to use net.ipv4.tcp_rmem = 4096 33554432 33554432 net.ipv4.tcp_wmem = 4096 33554432 33554432 net.ipv4.tcp_mem= 33554432 33554432 33554432 net.ipv4.route.flush=1 net.core.netdev_max_backlog=1500 #GPFS / NFS Tuning net.ipv4.conf.all.arp_filter = 1 net.ipv4.conf.all.rp_filter = 1 net.ipv4.neigh.default.gc_thresh1 = 1024 net.ipv4.neigh.default.gc_thresh2 = 4096 net.ipv4.neigh.default.gc_thresh3 = 8192 net.ipv4.neigh.default.gc_stale_time = 24" >> $IMGROOT/etc/sysctl.conf ## Limits: need to make sure users can have unlimited memory (ulimit -l unlimited) cp /etc/security/limits.conf $IMGROOT/etc/security/ ### add torque ### /opt/xcat/share/xcat/netboot/add-on/torque/add_torque $IMGROOT mgt /opt/torque / local # update the limit on pegged memory limit and if they have old xCAT then make sure # it says torque instead of pbs in the paths of the config file. perl -pi -e 's/ulimit -n 20000/ulimit -n 20000; ulimit -l unlimited/g' $IMGROOT/etc/init.d/pbs_mom perl -pi -e 's/spool\/pbs/spool\/torque/g' $IMGROOT/etc/init.d/pbs_mom perl -pi -e 's/spool\/pbs/spool\/torque/g' $IMGROOT/etc/init.d/pbs_mom perl -pi -e 's/PBS_HOME=\/var\/spool\/pbs/PBS_HOME=\/var\/spool\/torque/g' $IMGROOT/var/spool/torque/mom_priv/epilogue perl -pi -e 's/PBS_HOME=\/var\/spool\/pbs/PBS_HOME=\/var\/spool\/torque/g' $IMGROOT/var/spool/torque/mom_priv/prologue perl -pi -e 's/PBS_HOME=\/var\/spool\/pbs/PBS_HOME=\/var\/spool\/torque/g' $IMGROOT/var/spool/torque/mom_priv/takedownnode ### stateless init script does NTP, syslog, etc. #### cp /etc/localtime $IMGROOT/etc/ cp /opt/xcat/share/xcat/netboot/add-on/stateless/stateless $IMGROOT/etc/init.d/ perl -pi -e 's!Mountain!Mexico/General!g' $IMGROOT/etc/init.d/stateless perl -pi -e 's/driftfile \/etc\/ntp/driftfile \/var\/lib\/ntp/g' $IMGROOT/etc/init.d/stateless perl -pi -e 's/keys \/etc\/ntp/keys \/var\/lib\/ntp/g' $IMGROOT/etc/init.d/stateless rm -rf $IMGROOT/etc/sysconfig/clock chroot $IMGROOT chkconfig --level 345 stateless on chroot $IMGROOT chkconfig --level 345 rsyslog on ### make sure dhcp boot protocol is set to none ### perl -pi -e 's/dhcp/none/g' $IMGROOT/etc/sysconfig/network-scripts/ifcfg-eth0 #### add GPFS stuff ### # this is an example of a stateless GPFS implementation. Uncomment the below if you # want GPFS. Make sure you have the GPFS RPMs to do this. #GPFSSRCDIR=/install/GPFS #yum --installroot=$IMGROOT -y install glibc ksh compat-libstdc++-33 binutils rsh #rpm -ivh --root=$IMGROOT $GPFSSRCDIR/gpfs.base*rpm #rpm -ivh --root=$IMGROOT $GPFSSRCDIR/gpfs.gpl*rpm #rpm -ivh --root=$IMGROOT $GPFSSRCDIR/gpfs.msg*rpm #rpm -Uivh --root=$IMGROOT $GPFSSRCDIR/updates/gpfs.base*rpm #rpm -Uivh --root=$IMGROOT $GPFSSRCDIR/updates/gpfs.gpl*rpm #rpm -Uivh --root=$IMGROOT $GPFSSRCDIR/updates/gpfs.msg*rpm ## #AUTOGPFSDIR=/opt/xcat/share/xcat/netboot/add-on/autogpfs #cp $AUTOGPFSDIR/autogpfsc.pl $IMGROOT/usr/sbin #cp $AUTOGPFSDIR/autogpfsc $IMGROOT/etc/init.d/ #echo "SERVERS=mgt #PORT=3003 #BLOCK=no" >> $IMGROOT/etc/sysconfig/autogpfsc # #chroot $IMGROOT chkconfig --level 345 autogpfsc on #cp $GPFSSRCDIR/2.6.18-128.el5/* $IMGROOT/usr/lpp/mmfs/bin/ #cp /etc/profile.d/gpfs.sh $IMGROOT/etc/profile.d/ ### End GPFS Stuff #### ### IB Stuff #### # if you have InfiniBand on your machines then you probably have an OFED distribution you # need to add. Here is an example of how to add it. #echo "Adding InfiniBand" #yum --installroot=$IMGROOT -y install bind-utils which rpm tcl tk glibc-devel.i386 pciutils expat libgfortran.x86_64 libgomp.x86_64 tcsh # #IBROOT=/install/voltaire/VoltaireOFED-1.4.2_2-k2.6.18-128.el5-x86_64 #mount -o bind /proc $IMGROOT/proc #mount -o bind /sys $IMGROOT/sys #mount -o bind $IBROOT $IMGROOT/mnt #chroot $IMGROOT /mnt/install.sh --without-mpi #umount $IMGROOT/mnt #umount $IMGROOT/sys #umount $IMGROOT/proc # configure IB IP address #echo "IP=\`host \$HOSTNAME-ib0 | awk '{ print \$4 }' | head -1\`" >> $IMGROOT/etc/rc.d/rc.local #echo "ifconfig ib0 \$IP" >> $IMGROOT/etc/rc.d/rc.local #echo "ifconfig ib0 netmask 255.255.0.0" >> $IMGROOT/etc/rc.d/rc.local ### End IB Stuff #### # configure name resolution # change this to what you want it to be. (this is in site.tab as the domain and nameservers) echo "search cluster.foo nameserver 172.20.0.1" >$IMGROOT/etc/resolv.conf ### uniq-i-fy Torque OS name echo "opsys compute_centos5.3" >> $IMGROOT/var/spool/torque/mom_priv/config packimage -p $PROFILE -a $ARCH -o $OS echo "Image $PROFILE-$TIME created"