mirror of
				https://github.com/xcat2/xcat-core.git
				synced 2025-10-24 16:05:41 +00:00 
			
		
		
		
	Merge tag '2.14.3' into lenovobuild2.14
xCAT 2.14.3 release
This commit is contained in:
		| @@ -1,12 +1,12 @@ | ||||
| os: linux | ||||
| dist: trusty | ||||
| sudo: required | ||||
| before_install:  | ||||
| before_install: | ||||
| - sudo apt-get install -y git reprepro devscripts debhelper libsoap-lite-perl libdbi-perl quilt openssh-server dpkg looptools genometools software-properties-common | ||||
| - perl -v | ||||
| - echo "yes" | sudo cpan -f -i Capture::Tiny | ||||
|  | ||||
| script:  | ||||
| script: | ||||
| - echo $TRAVIS_BUILD_ID | ||||
| - echo $TRAVIS_EVENT_TYPE | ||||
| - echo $TRAVIS_BUILD_NUMBER | ||||
|   | ||||
							
								
								
									
										18
									
								
								README.rst
									
									
									
									
									
								
							
							
						
						
									
										18
									
								
								README.rst
									
									
									
									
									
								
							| @@ -6,23 +6,11 @@ xCAT is a toolkit for the deployment and administration of clusters. | ||||
| Documentation | ||||
| ------------- | ||||
|  | ||||
| Latest xCAT documentation is available at: http://xcat-docs.readthedocs.io/en/latest/ | ||||
| `The stable documentation of xCAT <https://xcat-docs.readthedocs.io/en/stable/>`_ | ||||
|  | ||||
| `document for xCAT 2.14.1 <http://xcat-docs.readthedocs.io/en/2.14.1/>`_ | ||||
| `The latest documentation of xCAT <http://xcat-docs.readthedocs.io/en/latest/index.html>`_ | ||||
|  | ||||
| `document for xCAT 2.14 <http://xcat-docs.readthedocs.io/en/2.14.0/>`_ | ||||
|  | ||||
| `document for xCAT 2.13.11 <http://xcat-docs.readthedocs.io/en/2.13.11/>`_ | ||||
|  | ||||
| `document for xCAT 2.13.10 <http://xcat-docs.readthedocs.io/en/2.13.10/>`_ | ||||
|  | ||||
| `document for xCAT 2.13.9 <http://xcat-docs.readthedocs.io/en/2.13.9/>`_ | ||||
|  | ||||
| `document for xCAT 2.13 <http://xcat-docs.readthedocs.io/en/2.13.0/>`_ | ||||
|  | ||||
| `document for xCAT 2.12 <http://xcat-docs.readthedocs.io/en/2.12/>`_ | ||||
|  | ||||
| `document for xCAT 2.11 <http://xcat-docs.readthedocs.io/en/2.11/>`_ | ||||
| `The documentation of xCAT each release <http://readthedocs.org/projects/xcat-docs/versions/>`_ | ||||
|  | ||||
|  | ||||
| Open Source License | ||||
|   | ||||
							
								
								
									
										117
									
								
								build-ubunturepo
									
									
									
									
									
								
							
							
						
						
									
										117
									
								
								build-ubunturepo
									
									
									
									
									
								
							| @@ -8,7 +8,7 @@ | ||||
| # Getting Started: | ||||
| #  - Clone the xcat-core git repository under a directory named "xcat-core/src" | ||||
| #  - make sure reprepro is installed on the build machine | ||||
| #  - Run this script from the local git repository you just created.   | ||||
| #  - Run this script from the local git repository you just created. | ||||
| #       ./build-ubunturepo -c BUILDALL=1 | ||||
|  | ||||
| # Usage:  attr=value attr=value ... ./build-ubunturepo { -c | -d } | ||||
| @@ -22,10 +22,12 @@ | ||||
| #                           When you are ready to release this build, use PROMOTE=1 without PREGA | ||||
| #               BUILDALL=1 - build all rpms, whether they changed or not.  Should be used for snap builds that are in | ||||
| #                            prep for a release. | ||||
| #               GPGSIGN=0 -  Do not sign the repo in the end of the build. The repo will be signed by default  | ||||
| #                             | ||||
| #               GPGSIGN=0 -  Do not sign the repo in the end of the build. The repo will be signed by default | ||||
| # | ||||
| #               LOG=<filename> - provide an LOG file option to redirect some output into log file | ||||
| # | ||||
| #               DEST=<directory> - provide a directory to contains the build result | ||||
| # | ||||
| # For the dependency packages 1. All the xcat dependency deb packages should be uploaded to | ||||
| #                                "pokgsa/projects/x/xcat/build/ubuntu/xcat-dep/debs/" on GSA | ||||
| #                             2. run ./build-ubunturepo -d | ||||
| @@ -47,7 +49,7 @@ fi | ||||
| . /etc/lsb-release | ||||
|  | ||||
| # Check the necessary packages before starting the build | ||||
| declare -a packages=( "reprepro" "devscripts" "debhelper" "libsoap-lite-perl" "libdbi-perl" "quilt" ) | ||||
| declare -a packages=( "reprepro" "devscripts" "debhelper" "libsoap-lite-perl" "libdbi-perl" "quilt" "git") | ||||
|  | ||||
| for package in ${packages[@]}; do | ||||
|     RC=`dpkg -l | grep $package >> /dev/null 2>&1; echo $?` | ||||
| @@ -59,7 +61,7 @@ done | ||||
|  | ||||
| # Process cmd line variable assignments, assigning each attr=val pair to a variable of same name | ||||
| for i in $*; do | ||||
|         echo $i | grep '=' | ||||
|         echo $i | grep '=' -q | ||||
|         if [ $? != 0 ];then | ||||
|             continue | ||||
|         fi | ||||
| @@ -102,24 +104,38 @@ if [ "$c_flag" -a "$d_flag" ];then | ||||
|     exit 2 | ||||
| fi | ||||
|  | ||||
| if [ -z "$BUILDALL" ]; then | ||||
|     BUILDALL=1 | ||||
| fi | ||||
|  | ||||
| # Find where this script is located to set some build variables | ||||
| old_pwd=`pwd` | ||||
| cd `dirname $0` | ||||
| curdir=`pwd` | ||||
|  | ||||
| #define the dep source code path, core build target path and dep build target path | ||||
| local_core_repo_path="$curdir/../../xcat-core" | ||||
| local_dep_repo_path="$curdir/../../xcat-dep/xcat-dep" | ||||
|  | ||||
| # Use flock to only one person build at the same time | ||||
| # Get a lock, so can not do 2 builds at once | ||||
| exec 8>/var/lock/xcatbld.lock | ||||
| if ! flock -n 8; then | ||||
|     echo "Can't get lock /var/lock/xcatbld.lock.  Someone else must be doing a build right now.  Exiting...." | ||||
|     echo "ERROR: Can't get lock /var/lock/xcatbld.lock.  Someone else must be doing a build right now.  Exiting...." | ||||
|     exit 1 | ||||
| fi | ||||
|  | ||||
| # for the git case, query the current branch and set REL (changing master to devel if necessary) | ||||
| function setbranch { | ||||
|     # Get the current branch name | ||||
|     branch=`git rev-parse --abbrev-ref HEAD` | ||||
|     if [ "$branch" = "master" ]; then | ||||
|         REL="devel" | ||||
|     elif [ "$branch" = "HEAD" ]; then | ||||
|         # Special handling when in a 'detached HEAD' state | ||||
|         branch=`git describe --abbrev=0 HEAD` | ||||
|         [[ -n "$branch" ]] && REL=`echo $branch|cut -d. -f 1,2` | ||||
|     else | ||||
|         REL=$branch | ||||
|     fi | ||||
| } | ||||
|  | ||||
| export HOME=/root | ||||
|  | ||||
| WGET_CMD="wget" | ||||
| @@ -142,31 +158,31 @@ else | ||||
|     done | ||||
| fi | ||||
|  | ||||
| REL=xcat-core | ||||
| if [ "$c_flag" ] | ||||
| then | ||||
|     #  | ||||
|     # The format of the directory for Ubuntu builds needs to be "xcat-core/src/xcat-core" so  | ||||
|     # that the output build files are created under "xcat-core".  | ||||
|     # TODO: This should be fixed in the future.... | ||||
|     # | ||||
|     if [ -z "$REL" ]; then | ||||
|         t=${curdir%/src/xcat-core} | ||||
|         REL=`basename $t` | ||||
|     setbranch | ||||
|     package_dir_name=debs$REL | ||||
|  | ||||
|     #define the dep source code path, core build target path and dep build target path | ||||
|     if [ -z "$DEST" ]; then | ||||
|         local_core_repo_path="$curdir/../../xcat-core" | ||||
|         PKGDIR="../../$package_dir_name" | ||||
|     else | ||||
|         local_core_repo_path="$DEST/$package_dir_name/xcat-core" | ||||
|         PKGDIR="$DEST/$package_dir_name/$package_dir_name" | ||||
|     fi | ||||
|     if [ "$REL" != "xcat-core" ]; then | ||||
|         echo "ERROR: REL='$REL'needs to be 'xcat-core'.  Ensure the path is 'xcat-core/src/xcat-core'" | ||||
|         exit 1 | ||||
|     if [ ! -d "$PKGDIR" ];then | ||||
|         mkdir -p "$PKGDIR" | ||||
|     fi | ||||
|  | ||||
|     echo "#############################################################" | ||||
|     echo "Building xcat-core on branch ($REL) to $local_core_repo_path" | ||||
|     echo "#############################################################" | ||||
|     if [ "$PROMOTE" != 1 ]; then | ||||
|         code_change=0 | ||||
|         update_log='' | ||||
|         # get the version | ||||
|         git_flag=1 | ||||
|         REL=`git rev-parse --abbrev-ref HEAD` | ||||
|         if [ "$REL" = "master" ]; then | ||||
|             REL="devel" | ||||
|         fi | ||||
|  | ||||
|         if [ -z "$GITUP" ];then | ||||
|             update_log=../coregitup | ||||
|             echo "git pull > $update_log" | ||||
| @@ -186,12 +202,8 @@ then | ||||
|         commit_id_long=`git rev-parse HEAD` | ||||
|         commit_id="${commit_id_long:0:7}" | ||||
|  | ||||
|         package_dir_name=debs$REL | ||||
|         #TODO: define the core path and tarball name | ||||
|         tarball_name="core-debs-snap.tar.bz2" | ||||
|  | ||||
|         if [ $code_change == 0 -a "$UP" != 1 -a "$BUILDALL" != 1 ]; then | ||||
|             echo "Nothing new detected" | ||||
|             echo "Nothing new detected.  Exiting...." | ||||
|             exit 0 | ||||
|         fi | ||||
|  | ||||
| @@ -205,9 +217,6 @@ then | ||||
|         xcat_release="snap$(date '+%Y%m%d%H%M')" | ||||
|         pkg_version="${ver}-${xcat_release}" | ||||
|  | ||||
|         if [ ! -d ../../$package_dir_name ];then | ||||
|             mkdir -p "../../$package_dir_name" | ||||
|         fi | ||||
|         packages="xCAT-client xCAT-genesis-scripts perl-xCAT xCAT-server xCAT xCATsn xCAT-test xCAT-buildkit xCAT-vlan xCAT-confluent xCAT-probe" | ||||
|         target_archs=(amd64 ppc64el) | ||||
|         for file in $packages | ||||
| @@ -221,7 +230,7 @@ then | ||||
|             for target_arch in $target_archs | ||||
|             do | ||||
|                 if grep -q $file $update_log || [ "$BUILDALL" == 1 -o "$file" = "perl-xCAT" ]; then | ||||
|                     rm -f ../../$package_dir_name/${file_low}_*.$target_arch.deb | ||||
|                     rm -f $PKGDIR/${file_low}_*.$target_arch.deb | ||||
|                     cd $file | ||||
|                     CURDIR=$(pwd) | ||||
|                     dch -v $pkg_version -b -c debian/changelog $build_string | ||||
| @@ -278,21 +287,12 @@ then | ||||
|                     cd - | ||||
|                     find $file -maxdepth 3 -type d -name "${file_low}*" | grep debian | xargs rm -rf | ||||
|                     find $file -maxdepth 3 -type f -name "files" | grep debian | xargs rm -rf | ||||
|                     mv ${file_low}* ../../$package_dir_name/ | ||||
|                     mv ${file_low}* $PKGDIR/ | ||||
|                 fi | ||||
|             done | ||||
|         done | ||||
|  | ||||
|         find ../../$package_dir_name/* ! -name *.deb | xargs rm -f | ||||
|     else | ||||
|         if [ "$REL" = "xcat-core" ];then | ||||
|             git_flag=1 | ||||
|             REL=`git rev-parse --abbrev-ref HEAD` | ||||
|             if [ "$REL" = "master" ]; then | ||||
|                 REL="devel" | ||||
|             fi | ||||
|         fi | ||||
|         package_dir_name=debs$REL | ||||
|         find $PKGDIR/* ! -name *.deb | xargs rm -f | ||||
|     fi | ||||
|  | ||||
|     if [ "$PROMOTE" = 1 ]; then | ||||
| @@ -332,9 +332,9 @@ Description: Repository automatically genereted conf | ||||
| __EOF__ | ||||
|  | ||||
|        if [ "$GPGSIGN" = "0" ];then | ||||
|            echo "GPGSIGN=$GPGSIGN specified, the repo will not be signed" | ||||
|            #echo "GPGSIGN=$GPGSIGN specified, the repo will not be signed" | ||||
|            echo "" >> conf/distributions | ||||
|        else  | ||||
|        else | ||||
|            echo "SignWith: 5619700D" >> conf/distributions | ||||
|            echo "" >> conf/distributions | ||||
|        fi | ||||
| @@ -375,7 +375,7 @@ __EOF__ | ||||
|     chmod 775 mklocalrepo.sh | ||||
|  | ||||
|     # | ||||
|     # Add a buildinfo file into the tar.bz2 file to track information about the build  | ||||
|     # Add a buildinfo file into the tar.bz2 file to track information about the build | ||||
|     # | ||||
|     BUILDINFO=$local_core_repo_path/buildinfo | ||||
|     echo "VERSION=$ver" > $BUILDINFO | ||||
| @@ -396,10 +396,18 @@ __EOF__ | ||||
|     chmod -R g+w xcat-core | ||||
|  | ||||
|     #build the tar ball | ||||
|     echo "Creating `pwd`/$tar_name ..." | ||||
|     tar -hjcf $tar_name xcat-core | ||||
|     chgrp root $tar_name | ||||
|     chmod g+w $tar_name | ||||
|  | ||||
|     if [ -n "$DEST" ]; then | ||||
|         ln -sf $(basename `pwd`)/$tar_name ../$tar_name | ||||
|         if [ $? != 0 ]; then | ||||
|             echo "ERROR: Failed to make symbol link $DEST/$tar_name" | ||||
|         fi | ||||
|     fi | ||||
|  | ||||
|     if [ ! -e core-snap ]; then | ||||
|         ln -s xcat-core core-snap | ||||
|     fi | ||||
| @@ -422,6 +430,13 @@ then | ||||
|        exit 1; | ||||
|     fi | ||||
|  | ||||
|     #define the dep source code path, core build target path and dep build target path | ||||
|     if [ -z "$DEST" ]; then | ||||
|         local_dep_repo_path="$curdir/../../xcat-dep/xcat-dep" | ||||
|     else | ||||
|         local_dep_repo_path="$DEST/xcat-dep/xcat-dep" | ||||
|     fi | ||||
|  | ||||
|     # Sync from the GSA master copy of the dep rpms | ||||
|     echo "Creating directory $local_dep_repo_path" | ||||
|     mkdir -p $local_dep_repo_path/ | ||||
| @@ -526,7 +541,7 @@ __EOF__ | ||||
|     APT_DIR="${FRS}/xcat" | ||||
|     APT_REPO_DIR="${APT_DIR}/repos/apt/devel" | ||||
|  | ||||
|     # Decide whether to upload the xcat-dep package or NOT (default is to NOT upload xcat-dep  | ||||
|     # Decide whether to upload the xcat-dep package or NOT (default is to NOT upload xcat-dep | ||||
|     if [ "$UP" != "1" ]; then | ||||
|         echo "Upload not specified, Done! (rerun with UP=1, to upload)" | ||||
|         cd $old_pwd | ||||
|   | ||||
							
								
								
									
										86
									
								
								buildcore.sh
									
									
									
									
									
								
							
							
						
						
									
										86
									
								
								buildcore.sh
									
									
									
									
									
								
							| @@ -29,8 +29,8 @@ | ||||
| #        EMBED=<embedded-environment> - the environment for which a minimal version of xcat should be built, e.g. zvm or flex | ||||
| #        VERBOSE=1 - to see lots of verbose output | ||||
| #        LOG=<filename> - provide an LOG file option to redirect some output into log file | ||||
| #        RPMSIGN=0 or RPMSIGN=1 - Sign the RPMs using the keys on GSA, the default is to sign the rpms without RPMSIGN specified | ||||
|  | ||||
| #        GPGSIGN/RPMSIGN=0 or GPGSIGN/RPMSIGN=1 - Sign the RPMs using the keys on GSA, the default is to sign the rpms without GPGSIGN/RPMSIGN specified | ||||
| #        DEST=<directory> - provide a directory to contains the build result | ||||
| # | ||||
| # The following environment variables can be modified if you need | ||||
| # | ||||
| @@ -51,24 +51,10 @@ YUMREPOURL="http://${SERVER}/${FILES_PATH}/xcat/repos/yum" | ||||
| if [ "$1" = "-h"  ] || [ "$1" = "-help"  ] || [ "$1" = "--help"  ]; then | ||||
|     echo "Usage:" | ||||
|     echo "      ./buildcore.sh [-h | --help]" | ||||
|     echo "      ./buildcore.sh [UP=1] [RPMSIGN=1] [EMBED=<embedded-environment>] [COMMITID=<id>] [GITPULL=0]" | ||||
|     echo "      ./buildcore.sh [UP=1] [GPGSIGN=1] [EMBED=<embedded-environment>] [COMMITID=<id>] [GITPULL=0]" | ||||
|     exit 0 | ||||
| fi | ||||
|  | ||||
| # For users to build from source code, simply run ./buildcore.sh | ||||
| #  1. Do not sign RPM by default | ||||
| #  2. Build all packages by default | ||||
| #  3. Do not upload to sourcefore by default | ||||
| if [ -z "$RPMSIGN" ]; then | ||||
|     RPMSIGN=0 | ||||
| fi | ||||
| if [ -z "$BUILDALL" ]; then | ||||
|     BUILDALL=1 | ||||
| fi | ||||
| if [ -z "$UP" ]; then | ||||
|     UP=0 | ||||
| fi | ||||
|  | ||||
| # These are the rpms that should be built for each kind of xcat build | ||||
| ALLBUILD="perl-xCAT xCAT-client xCAT-server xCAT-test xCAT-buildkit xCAT xCATsn xCAT-genesis-scripts xCAT-SoftLayer xCAT-vlan xCAT-confluent xCAT-probe xCAT-csm" | ||||
| ALLBUILD="perl-xCAT xCAT-client xCAT-server xCAT-test xCAT-buildkit xCAT xCATsn xCAT-genesis-scripts xCAT-SoftLayer xCAT-vlan xCAT-confluent xCAT-probe xCAT-csm xCAT-openbmc-py" | ||||
| @@ -96,6 +82,26 @@ if [ "$VERBOSE" = "1" -o "$VERBOSE" = "yes" ]; then | ||||
|     VERBOSEMODE=1 | ||||
| fi | ||||
|  | ||||
| # For users to build from source code, simply run ./buildcore.sh | ||||
| #  1. Do not sign RPM by default | ||||
| #  2. Build all packages by default | ||||
| #  3. Do not upload to sourcefore by default | ||||
|  | ||||
| if [ -z "$RPMSIGN" ] && [ -z "$GPGSIGN" ]; then | ||||
|     RPMSIGN=0 | ||||
| elif [ -n "$GPGSIGN" ]; then # use GPGSIGN in first | ||||
|     RPMSIGN=$GPGSIGN | ||||
| fi | ||||
| if [ -z "$RPMSIGN" -o "$RPMSIGN" != "1" ]; then | ||||
|     RPMSIGN=0 | ||||
| fi | ||||
| if [ -z "$BUILDALL" ]; then | ||||
|     BUILDALL=1 | ||||
| fi | ||||
| if [ -z "$UP" ]; then | ||||
|     UP=0 | ||||
| fi | ||||
|  | ||||
| # Find where this script is located to set some build variables | ||||
| cd `dirname $0` | ||||
| # strip the /src/xcat-core from the end of the dir to get the next dir up and use as the release | ||||
| @@ -123,9 +129,16 @@ fi | ||||
|  | ||||
| # for the git case, query the current branch and set REL (changing master to devel if necessary) | ||||
| function setbranch { | ||||
|     REL=`git name-rev --name-only HEAD` | ||||
|     if [ "$REL" = "master" ]; then | ||||
|     # Get the current branch name | ||||
|     branch=`git rev-parse --abbrev-ref HEAD` | ||||
|     if [ "$branch" = "master" ]; then | ||||
|         REL="devel" | ||||
|     elif [ "$branch" = "HEAD" ]; then | ||||
|         # Special handling when in a 'detached HEAD' state | ||||
|         branch=`git describe --abbrev=0 HEAD` | ||||
|         [[ -n "$branch" ]] && REL=`echo $branch|cut -d. -f 1,2` | ||||
|     else | ||||
|         REL=$branch | ||||
|     fi | ||||
| } | ||||
|  | ||||
| @@ -159,15 +172,16 @@ else | ||||
| fi | ||||
|  | ||||
| XCATCORE="xcat-core"        # core-snap is a sym link to xcat-core | ||||
| SRCD=core-snap-srpms | ||||
|  | ||||
| if [ "$GIT" = "1" ]; then    # using git - need to include REL in the path where we put the built rpms | ||||
|     #DESTDIR=../../$REL$EMBEDDIR/$XCATCORE | ||||
|         DESTDIR=$HOME/xcatbuild/$REL$EMBEDDIR/$XCATCORE | ||||
|     [ -z "$DEST" ] && DESTDIR=$HOME/xcatbuild/$REL$EMBEDDIR/$XCATCORE \ | ||||
|                    || DESTDIR=$DEST/$REL$EMBEDDIR/$XCATCORE | ||||
| else | ||||
|     #DESTDIR=../..$EMBEDDIR/$XCATCORE | ||||
|         DESTDIR=$HOME/xcatbuild/..$EMBEDDIR/$XCATCORE | ||||
|     [ -z "$DEST" ] && DESTDIR=$HOME/xcatbuild/..$EMBEDDIR/$XCATCORE \ | ||||
|                    || DESTDIR=$DEST/xcatbuild/..$EMBEDDIR/$XCATCORE | ||||
| fi | ||||
| SRCD=core-snap-srpms | ||||
|  | ||||
| # currently aix builds ppc rpms, but someday it should build noarch | ||||
| if [ "$OSNAME" = "AIX" ]; then | ||||
| @@ -437,7 +451,7 @@ fi | ||||
|  | ||||
| # get gpg keys in place | ||||
| if [ "$OSNAME" != "AIX" ]; then | ||||
|     if [ -z "$RPMSIGN" -o "$RPMSIGN" == "1" ]; then | ||||
|     if [ "$RPMSIGN" == "1" ]; then | ||||
|         mkdir -p $HOME/.gnupg | ||||
|         for i in pubring.gpg secring.gpg trustdb.gpg; do | ||||
|             if [ ! -f $HOME/.gnupg/$i ] || | ||||
| @@ -453,17 +467,20 @@ if [ "$OSNAME" != "AIX" ]; then | ||||
|             echo '%_signature gpg' >> $MACROS | ||||
|         fi | ||||
|         if ! $GREP '%_gpg_name' $MACROS 2>/dev/null; then | ||||
|             echo '%_gpg_name xCAT Security Key' >> $MACROS | ||||
|             echo '%_gpg_name xCAT Automatic Signing Key' >> $MACROS | ||||
|         fi | ||||
|         echo "Signing RPMs..." | ||||
|         build-utils/rpmsign.exp `find $DESTDIR -type f -name '*.rpm'` | grep -v -E '(already contains identical signature|was already signed|rpm --quiet --resign|WARNING: standard input reopened)' | ||||
|         build-utils/rpmsign.exp $SRCDIR/*rpm | grep -v -E '(already contains identical signature|was already signed|rpm --quiet --resign|WARNING: standard input reopened)' | ||||
|         createrepo --checksum sha $DESTDIR            # specifying checksum so the repo will work on rhel5 | ||||
|         createrepo --checksum sha $SRCDIR | ||||
|         # RHEL5 is archaic. Use the default hash algorithm to do the checksum. | ||||
|         # Which is SHA-256 on RHEL6. | ||||
|         createrepo $DESTDIR | ||||
|         createrepo $SRCDIR | ||||
|         rm -f $SRCDIR/repodata/repomd.xml.asc | ||||
|         rm -f $DESTDIR/repodata/repomd.xml.asc | ||||
|         gpg -a --detach-sign $DESTDIR/repodata/repomd.xml | ||||
|         gpg -a --detach-sign $SRCDIR/repodata/repomd.xml | ||||
|         # Use the xCAT Automatic Signing Key to do the signing | ||||
|         gpg -a --detach-sign --default-key 5619700D $DESTDIR/repodata/repomd.xml | ||||
|         gpg -a --detach-sign --default-key 5619700D $SRCDIR/repodata/repomd.xml | ||||
|         if [ ! -f $DESTDIR/repodata/repomd.xml.key ]; then | ||||
|             ${WGET_CMD} -q -P $DESTDIR/repodata $GSA/keys/repomd.xml.key | ||||
|         fi | ||||
| @@ -471,8 +488,8 @@ if [ "$OSNAME" != "AIX" ]; then | ||||
|             ${WGET_CMD} -P $SRCDIR/repodata $GSA/keys/repomd.xml.key | ||||
|         fi | ||||
|     else | ||||
|         createrepo --checksum sha $DESTDIR | ||||
|         createrepo --checksum sha $SRCDIR | ||||
|         createrepo $DESTDIR | ||||
|         createrepo $SRCDIR | ||||
|     fi | ||||
| fi | ||||
|  | ||||
| @@ -574,6 +591,13 @@ fi | ||||
| chgrp $SYSGRP $TARNAME | ||||
| chmod g+w $TARNAME | ||||
|  | ||||
| if [ -n "$DEST" ]; then | ||||
|     ln -sf $(basename `pwd`)/$TARNAME ../$TARNAME | ||||
|     if [ $? != 0 ]; then | ||||
|         echo "ERROR: Failed to make symbol link $DEST/$TARNAME" | ||||
|     fi | ||||
| fi | ||||
|  | ||||
| # Decide whether to upload or not | ||||
| if [ -n "$UP" ] && [ "$UP" == 0 ]; then | ||||
|     exit 0; | ||||
|   | ||||
							
								
								
									
										32
									
								
								builddep.sh
									
									
									
									
									
								
							
							
						
						
									
										32
									
								
								builddep.sh
									
									
									
									
									
								
							| @@ -3,21 +3,21 @@ | ||||
| # | ||||
| # Package up all the xCAT open source dependencies | ||||
| # - creating the yum repos | ||||
| # - tar up the deps package  | ||||
| # - tar up the deps package | ||||
| # | ||||
| # This script assumes that the individual rpms have already been compiled | ||||
| # for the relevant architectures from the src & spec files in git. | ||||
| # | ||||
| # Dependencies: | ||||
| # - createrepo command needs to be present on the build machine  | ||||
| # - createrepo command needs to be present on the build machine | ||||
| # | ||||
| # Usage:  builddep.sh [attr=value attr=value ...] | ||||
| #       DESTDIR=<dir> - the dir to place the dep tarball in.  The default is ../../../xcat-dep,  | ||||
| #       DESTDIR=<dir> - the dir to place the dep tarball in.  The default is ../../../xcat-dep, | ||||
| #                       relative to where this script is located. | ||||
| #       UP=0 or UP=1  - override the default upload behavior  | ||||
| #       FRSYUM=0      - put the directory of individual rpms in the project web area instead  | ||||
| #       UP=0 or UP=1  - override the default upload behavior | ||||
| #       FRSYUM=0      - put the directory of individual rpms in the project web area instead | ||||
| #                       of the FRS area. | ||||
| #       VERBOSE=1     - Set to 1 to see more VERBOSE output  | ||||
| #       VERBOSE=1     - Set to 1 to see more VERBOSE output | ||||
|  | ||||
| # you can change this if you need to | ||||
| USER=xcat | ||||
| @@ -45,7 +45,7 @@ fi | ||||
|  | ||||
| if [ ! -d $GSA ]; then | ||||
| 	echo "ERROR: This script is intended to be used by xCAT development..." | ||||
| 	echo "ERROR: The GSA directory ($GSA) directory does not appear to be mounted, cannot continue!"  | ||||
| 	echo "ERROR: The GSA directory ($GSA) directory does not appear to be mounted, cannot continue!" | ||||
| 	exit 1 | ||||
| fi | ||||
|  | ||||
| @@ -73,7 +73,7 @@ XCATCOREDIR=`/bin/pwd` | ||||
| if [ -z "$DESTDIR" ]; then | ||||
| 	# This is really a hack here because it depends on the build | ||||
| 	# environment structure.  However, it's not expected that | ||||
| 	# users are building the xcat-dep packages  | ||||
| 	# users are building the xcat-dep packages | ||||
| 	DESTDIR=../../xcat-dep | ||||
| fi | ||||
|  | ||||
| @@ -129,7 +129,7 @@ if [ "$OSNAME" != "AIX" ]; then | ||||
| 	echo "===> Modifying the xcat-dep.repo files to point to the correct location..." | ||||
| 	# 10/01/2015 - vkhu | ||||
| 	# The URLs have been updated in GSA, this section is not needed at the moment | ||||
| 	#  | ||||
| 	# | ||||
| 	#if [ "$FRSYUM" != 0 ]; then | ||||
| 	#	newurl="$YUMREPOURL2" | ||||
| 	#	oldurl="$YUMREPOURL1" | ||||
| @@ -218,12 +218,12 @@ fi | ||||
|  | ||||
| # Get the permissions and group correct | ||||
| if [ "$OSNAME" == "AIX" ]; then | ||||
| 	# AIX  | ||||
| 	# AIX | ||||
| 	SYSGRP=system | ||||
| 	YUM=aix | ||||
| 	FRSDIR='2.x_AIX' | ||||
| else | ||||
| 	# Linux  | ||||
| 	# Linux | ||||
| 	SYSGRP=root | ||||
| 	YUM=yum/devel | ||||
| 	FRSDIR='2.x_Linux' | ||||
| @@ -232,9 +232,9 @@ chgrp -R -h $SYSGRP * | ||||
| chmod -R g+w * | ||||
|  | ||||
| echo "===> Building the tarball..." | ||||
| #  | ||||
| # | ||||
| # Want to stay above xcat-dep so we can rsync the whole directory | ||||
| # DO NOT CHANGE DIRECTORY AFTER THIS POINT!!  | ||||
| # DO NOT CHANGE DIRECTORY AFTER THIS POINT!! | ||||
| # | ||||
| cd .. | ||||
| pwd | ||||
| @@ -254,12 +254,12 @@ else | ||||
| 	tar $verbosetar -jcf $DFNAME xcat-dep | ||||
| fi | ||||
|  | ||||
| if [[ ${UP} -eq 0 ]]; then  | ||||
| if [[ ${UP} -eq 0 ]]; then | ||||
| 	echo "Upload not being done, set UP=1 to upload to xcat.org" | ||||
| 	exit 0; | ||||
| fi | ||||
|  | ||||
| # Upload the directory structure to xcat.org yum area (xcat/repos/yum).  | ||||
| # Upload the directory structure to xcat.org yum area (xcat/repos/yum). | ||||
| if [ "$FRSYUM" != 0 ]; then | ||||
| 	links="-L"	# FRS does not support rsyncing sym links | ||||
| else | ||||
| @@ -284,7 +284,7 @@ echo "Uploading README to $FRS/xcat-dep/$FRSDIR/ ..." | ||||
| while [ $((i+=1)) -le 5 ] && ! rsync -v README  $USER@$TARGET_MACHINE:$FRS/xcat-dep/$FRSDIR/ | ||||
| do : ; done | ||||
|  | ||||
| # For some reason the README is not updated  | ||||
| # For some reason the README is not updated | ||||
| echo "Uploading README to $YUMDIR/$YUM/ ..." | ||||
| while [ $((i+=1)) -le 5 ] && ! rsync -v README  $USER@$TARGET_MACHINE:$YUMDIR/$YUM/ | ||||
| do : ; done | ||||
|   | ||||
| @@ -35,7 +35,7 @@ ls $CURDIR/makerpm | ||||
|  | ||||
| if [ $? -gt 0 ]; then | ||||
|                 echo "Error:no repo exist, exit 1." | ||||
|                 exit 1  | ||||
|                 exit 1 | ||||
| fi | ||||
|  | ||||
| # Get a lock, so can not do 2 builds at once | ||||
| @@ -77,8 +77,8 @@ echo "This is an Ubuntu system" | ||||
|      fi | ||||
|      cd - | ||||
|      mv ${rpmname_low}* $CURDIR/build | ||||
|   | ||||
|  done  | ||||
|  | ||||
|  done | ||||
|      #delete all files except  .deb file | ||||
|      find $CURDIR/build/* ! -name *.deb | xargs rm -f | ||||
|  | ||||
| @@ -97,31 +97,31 @@ echo "This is an $OSNAME system" | ||||
|      fi | ||||
|  | ||||
|      mkdir -p $CURDIR/build/ | ||||
|    | ||||
|  | ||||
|    #always build perl-xCAT | ||||
|    $CURDIR/makerpm  perl-xCAT   | ||||
|   | ||||
|    $CURDIR/makerpm  perl-xCAT | ||||
|  | ||||
|  | ||||
|    # Build the rest of the noarch rpms | ||||
|    for rpmname in xCAT-client xCAT-server xCAT-IBMhpc xCAT-rmc xCAT-test xCAT-buildkit xCAT-vlan; do | ||||
|         if [ "$OSNAME" = "AIX" -a "$rpmname" = "xCAT-buildkit" ]; then continue; fi      | ||||
|         if [ "$OSNAME" = "AIX" -a "$rpmname" = "xCAT-buildkit" ]; then continue; fi | ||||
|         $CURDIR/makerpm $rpmname | ||||
|    done | ||||
|    | ||||
|  | ||||
|   #build xCAT-genesis-scripts if it is x86_64 platform | ||||
|   ARCH=$(uname -p) | ||||
|   if [ "$ARCH" = "x86_64" ]; then  | ||||
|        $CURDIR/makerpm xCAT-genesis-scripts x86_64   | ||||
|   if [ "$ARCH" = "x86_64" ]; then | ||||
|        $CURDIR/makerpm xCAT-genesis-scripts x86_64 | ||||
|   else | ||||
|        $CURDIR/makerpm xCAT-genesis-scripts ppc64 | ||||
|   fi | ||||
|  | ||||
|    | ||||
|  | ||||
|   # Build the xCAT and xCATsn rpms for all platforms | ||||
|   for rpmname in xCAT xCATsn; do | ||||
|                 if [ "$OSNAME" = "AIX" ]; then | ||||
|                         $CURDIR/makerpm $rpmname | ||||
|                         if [ $? -ne 0 ]; then FAILEDRPMS="$FAILEDRPMS $rpmname"; fi  | ||||
|                         if [ $? -ne 0 ]; then FAILEDRPMS="$FAILEDRPMS $rpmname"; fi | ||||
|                 else | ||||
|                         for arch in x86_64 ppc64 s390x; do | ||||
|                                 $CURDIR/makerpm $rpmname $arch | ||||
| @@ -133,7 +133,7 @@ echo "This is an $OSNAME system" | ||||
|   if [ "$OS" = "SUSE" ]; then | ||||
|       cp /usr/src/packages/RPMS/noarch/* $CURDIR/build/ | ||||
|       cp /usr/src/packages/RPMS/x86_64/* $CURDIR/build/ | ||||
|       cp /usr/src/packages/RPMS/ppc64/* $CURDIR/build/  | ||||
|       cp /usr/src/packages/RPMS/ppc64/* $CURDIR/build/ | ||||
|   else | ||||
|       cp /root/rpmbuild/RPMS/noarch/* $CURDIR/build/ | ||||
|       cp /root/rpmbuild/RPMS/x86_64/* $CURDIR/build/ | ||||
| @@ -158,7 +158,7 @@ EOF | ||||
|      rm -f /etc/zypp/repos.d/xcat-core.repo | ||||
|      zypper ar file://$CURDIR/build xcat-core | ||||
|   fi | ||||
|     | ||||
|  | ||||
| fi | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -20,15 +20,15 @@ parser.add_option("--prefix", dest="PREFIX", help="Specify the location of the P | ||||
| POD2RST="pod2rst" | ||||
|  | ||||
| def cmd_exists(cmd): | ||||
|     return subprocess.call("type " + cmd, shell=True,  | ||||
|     return subprocess.call("type " + cmd, shell=True, | ||||
|         stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0 | ||||
|  | ||||
| prefix_path = None | ||||
| prefix_lib_path = None | ||||
|  | ||||
| if options.PREFIX:  | ||||
| if options.PREFIX: | ||||
|     if '~' in options.PREFIX: | ||||
|         # else assume full path is provided  | ||||
|         # else assume full path is provided | ||||
|         prefix_path = os.path.expanduser(options.PREFIX) | ||||
|     else: | ||||
|         prefix_path = options.PREFIX | ||||
| @@ -39,13 +39,13 @@ if options.PREFIX: | ||||
|         sys.exit(1) | ||||
|  | ||||
|     prefix_lib_path = "%s/lib" %(prefix_path) | ||||
|     if not os.path.isdir(prefix_lib_path):  | ||||
|     if not os.path.isdir(prefix_lib_path): | ||||
|         prefix_lib_path = "%s/lib64" %(prefix_path) | ||||
|         if not os.path.isdir(prefix_lib_path): | ||||
|             print "ERROR, Cannot find the Perl lib directory in %s/lib or %s/lib64" %(prefix_path, prefix_path) | ||||
|             sys.exit(1) | ||||
|  | ||||
| else:  | ||||
| else: | ||||
|     if not cmd_exists(POD2RST): | ||||
|         print "ERROR, %s requires pod2rst to continue!" %(os.path.basename(__file__)) | ||||
|         parser.print_help() | ||||
| @@ -57,13 +57,13 @@ MANPAGE_DEST="./docs/source/guides/admin-guides/references/man" | ||||
|  | ||||
| # | ||||
| # add the following to delete the generate files before creating them | ||||
| # essentially this allows us to remove man pages and they will be  | ||||
| # essentially this allows us to remove man pages and they will be | ||||
| # removed in the generation | ||||
| print "Cleaning up the generated man pages in %s" %(MANPAGE_DEST) | ||||
| allfiles = glob("%s*/*.rst" %(MANPAGE_DEST)) | ||||
| for d in allfiles:  | ||||
|     # Skip over the index.rst file  | ||||
|     if not "index.rst" in d:  | ||||
| for d in allfiles: | ||||
|     # Skip over the index.rst file | ||||
|     if not "index.rst" in d: | ||||
|         print "Removing file %s" %(d) | ||||
|         os.remove(d) | ||||
|  | ||||
| @@ -93,18 +93,18 @@ def fix_double_dash(rst_file): | ||||
|     os.system(sed_cmd) | ||||
|     #remove intermediate .sed1 file | ||||
|     rm_sed1file_cmd = "rm %s.sed1" %(rst_file) | ||||
|     os.system(rm_sed1file_cmd)    | ||||
|     os.system(rm_sed1file_cmd) | ||||
|  | ||||
| build_db_man_pages() | ||||
|  | ||||
| # List the xCAT component directory which contain pod pages | ||||
| COMPONENTS = ['xCAT-SoftLayer', 'xCAT-test', 'xCAT-client', 'xCAT-vlan', 'perl-xCAT', 'xCAT-buildkit'] | ||||
|  | ||||
| for component in COMPONENTS:  | ||||
| for component in COMPONENTS: | ||||
|     for root,dirs,files in os.walk("%s" %(component)): | ||||
|  | ||||
|         for file in files: | ||||
|             # only interested in .pod files  | ||||
|             # only interested in .pod files | ||||
|             if file.endswith(".pod"): | ||||
|                 pod_input = os.path.join(root,file) | ||||
|  | ||||
| @@ -130,7 +130,7 @@ for component in COMPONENTS: | ||||
|                     cmd = "perl -I %s/share/perl5 %s/bin/%s " %(prefix_path, prefix_path, POD2RST) | ||||
|  | ||||
|                 cmd += " --infile=%s --outfile=%s --title=%s.%s" %(pod_input, rst_output, title, man_ver) | ||||
|                 # print cmd  | ||||
|                 # print cmd | ||||
|                 os.system(cmd) | ||||
| 		if man_ver == '1' or man_ver == '8': | ||||
|                     fix_vertical_bar(rst_output) | ||||
|   | ||||
| @@ -10,11 +10,11 @@ The documentation project is written in restructured text (.rst) using Sphinx an | ||||
|  | ||||
| * Using pip, install or update sphinx (See: http://pip.readthedocs.org/) | ||||
|    ``` | ||||
|     pip install sphinx   | ||||
|     pip install sphinx | ||||
|    ``` | ||||
|    or | ||||
|    ``` | ||||
|     pip install sphinx --upgrade  | ||||
|     pip install sphinx --upgrade | ||||
|    ``` | ||||
|  | ||||
| * Using pip, install ReadTheDocs theme | ||||
|   | ||||
| @@ -5,4 +5,4 @@ Questions & Answers | ||||
|    :maxdepth: 2 | ||||
|  | ||||
|  | ||||
|    makehosts.rst | ||||
|    makehosts.rst | ||||
|   | ||||
| @@ -67,15 +67,15 @@ Q: How to configure aliases? | ||||
| There are 3 methods to configure aliases: | ||||
|  | ||||
| #. Use ``hostnames`` in ``hosts`` table to configure aliases for the installnic. | ||||
| #. If you want to use script ``confignetwork`` to configure secondary NICs, suggest to use ``aliases`` in ``nics`` table to configure aliases.  Refer to :doc:`Configure Aliases <../guides/admin-guides/manage_clusters/common/deployment/network/cfg_network_aliases>`  | ||||
| #. If you want to generate aliases records in ``/etc/hosts`` for secondary NICs and you don't want to use the script ``confignetwork`` to configure these NICs, suggest to use ``otherinterfaces`` in ``hosts`` table to configure aliases.  Refer to following example:  | ||||
|         | ||||
| #. If you want to use script ``confignetwork`` to configure secondary NICs, suggest to use ``aliases`` in ``nics`` table to configure aliases.  Refer to :doc:`Configure Aliases <../guides/admin-guides/manage_clusters/common/deployment/network/cfg_network_aliases>` | ||||
| #. If you want to generate aliases records in ``/etc/hosts`` for secondary NICs and you don't want to use the script ``confignetwork`` to configure these NICs, suggest to use ``otherinterfaces`` in ``hosts`` table to configure aliases.  Refer to following example: | ||||
|  | ||||
|     * If you want to add ``node1-hd`` ``20.1.1.1`` in ``hosts`` table, and don't use ``confignetwork`` to configure it, you can add ``otherinterfaces`` like this: :: | ||||
|  | ||||
|         chdef node1 otherinterfaces="node1-hd:20.1.1.1"  | ||||
|         chdef node1 otherinterfaces="node1-hd:20.1.1.1" | ||||
|  | ||||
|     * After executing ``makehosts -n``, you can get records in ``/etc/hosts`` like following: :: | ||||
|      | ||||
|  | ||||
|         20.1.1.1 node1-hd | ||||
|  | ||||
| **Note**: If suffixes or aliases for the same IP are configured in both ``hosts`` table and ``nics`` table, will cause conflicts. ``makehosts`` will use values from ``nics`` table. The values from ``nics`` table will over-write that from ``hosts`` table to create ``/etc/hosts`` records. | ||||
| @@ -90,7 +90,7 @@ You can follow the best practice example. | ||||
|     * There are 2 networks in different domains: ``mgtnetwork`` and ``pubnetwork`` | ||||
|     * ``mgtnetwork`` is xCAT management network | ||||
|     * Generate 2 records with the same hostname in ``/etc/hosts``, like: :: | ||||
|     | ||||
|  | ||||
|         10.5.106.101 node1.cluster.com | ||||
|         192.168.20.101 node1.public.com | ||||
|  | ||||
| @@ -101,11 +101,11 @@ You can follow the best practice example. | ||||
|     #. Add networks entry in ``networks`` table: :: | ||||
|  | ||||
|         chdef -t network mgtnetwork net=10.0.0.0 mask=255.0.0.0 domain=cluster.com | ||||
|         chdef -t network pubnetwork net=192.168.30.0 mask=255.255.255.0 domain=public.com  | ||||
|       | ||||
|         chdef -t network pubnetwork net=192.168.30.0 mask=255.255.255.0 domain=public.com | ||||
|  | ||||
|     #. Create ``node1`` with ``ip=10.5.106.101``, xCAT can manage and install this node: :: | ||||
|     | ||||
|         chdef node1 ip=10.5.106.101 groups=all  | ||||
|  | ||||
|         chdef node1 ip=10.5.106.101 groups=all | ||||
|  | ||||
|     #. Create ``node1-pub`` with ``ip=192.168.30.101``, this node is only used to generate ``/etc/hosts`` records for public network, can use ``_unmanaged`` group name to label it: :: | ||||
|  | ||||
| @@ -114,9 +114,9 @@ You can follow the best practice example. | ||||
|     #. Execute ``makehosts -n`` to generate ``/etc/hosts`` records: :: | ||||
|  | ||||
|         makehosts -n | ||||
|      | ||||
|  | ||||
|     #. Check results in ``/etc/hosts``: :: | ||||
|       | ||||
|  | ||||
|         10.5.106.101 node1 node1.cluster.com | ||||
|         192.168.30.101 node1-pub node1.public.com | ||||
|  | ||||
|   | ||||
| @@ -66,15 +66,15 @@ Q: How to configure aliases? | ||||
| There are 3 methods to configure aliases: | ||||
|  | ||||
| #. Use ``hostnames`` in ``hosts`` table to configure aliases for the installnic. | ||||
| #. If you want to use script ``confignetwork`` to configure secondary NICs, suggest to use ``aliases`` in ``nics`` table to configure aliases, you can refer to :doc:`Configure Aliases <../guides/admin-guides/manage_clusters/common/deployment/network/cfg_network_aliases>`  | ||||
| #. If you want to generate aliases records in ``/etc/hosts`` for secondary NICs, and don't want to use script ``confignetwork`` to configure these NICs, suggest to use ``otherinterfaces`` in ``hosts`` table to configure aliases. You can refer to following example:  | ||||
|         | ||||
| #. If you want to use script ``confignetwork`` to configure secondary NICs, suggest to use ``aliases`` in ``nics`` table to configure aliases, you can refer to :doc:`Configure Aliases <../guides/admin-guides/manage_clusters/common/deployment/network/cfg_network_aliases>` | ||||
| #. If you want to generate aliases records in ``/etc/hosts`` for secondary NICs, and don't want to use script ``confignetwork`` to configure these NICs, suggest to use ``otherinterfaces`` in ``hosts`` table to configure aliases. You can refer to following example: | ||||
|  | ||||
|     * If you want to add ``node1-hd`` ``20.1.1.1`` in ``hosts`` table, and don't use ``confignetwork`` to configure it, you can add ``otherinterfaces`` like this: :: | ||||
|  | ||||
|         chdef node1 otherinterfaces="node1-hd:20.1.1.1"  | ||||
|         chdef node1 otherinterfaces="node1-hd:20.1.1.1" | ||||
|  | ||||
|     * After executing ``makehosts -n``, you can get records in ``/etc/hosts`` like following: :: | ||||
|      | ||||
|  | ||||
|         20.1.1.1 node1-hd | ||||
|  | ||||
| **Note**: If suffixes or aliases for the same IP are configured in both ``hosts`` table and ``nics`` table, will cause conflicts. ``makehosts`` will use values from ``nics`` table. The values from ``nics`` table will over-write that from ``hosts`` table to create ``/etc/hosts`` records. | ||||
| @@ -89,7 +89,7 @@ You can follow the best practice example. | ||||
|     * There are 2 networks in different domains: ``mgtnetwork`` and ``pubnetwork`` | ||||
|     * ``mgtnetwork`` is xCAT management network | ||||
|     * Generate 2 records with the same hostname in ``/etc/hosts``, like: :: | ||||
|     | ||||
|  | ||||
|         10.5.106.101 node1.cluster.com | ||||
|         192.168.20.101 node1.public.com | ||||
|  | ||||
| @@ -100,11 +100,11 @@ You can follow the best practice example. | ||||
|     #. Add networks entry in ``networks`` table: :: | ||||
|  | ||||
|         chdef -t network mgtnetwork net=10.0.0.0 mask=255.0.0.0 domain=cluster.com | ||||
|         chdef -t network pubnetwork net=192.168.30.0 mask=255.255.255.0 domain=public.com  | ||||
|       | ||||
|         chdef -t network pubnetwork net=192.168.30.0 mask=255.255.255.0 domain=public.com | ||||
|  | ||||
|     #. Create ``node1`` with ``ip=10.5.106.101``, xCAT can manage and install this node: :: | ||||
|     | ||||
|         chdef node1 ip=10.5.106.101 groups=all  | ||||
|  | ||||
|         chdef node1 ip=10.5.106.101 groups=all | ||||
|  | ||||
|     #. Create ``node1-pub`` with ``ip=192.168.30.101``, this node is only used to generate ``/etc/hosts`` records for public network, can use ``_unmanaged`` group name to label it: :: | ||||
|  | ||||
| @@ -113,9 +113,9 @@ You can follow the best practice example. | ||||
|     #. Execute ``makehosts -n`` to generate ``/etc/hosts`` records: :: | ||||
|  | ||||
|         makehosts -n | ||||
|      | ||||
|  | ||||
|     #. Check results in ``/etc/hosts``: :: | ||||
|       | ||||
|  | ||||
|         10.5.106.101 node1 node1.cluster.com | ||||
|         192.168.30.101 node1-pub node1.public.com | ||||
|  | ||||
|   | ||||
| @@ -7,8 +7,8 @@ The chain table (``tabdump chain``) is an xCAT database table that holds the cha | ||||
|  * currchain | ||||
|  * chain | ||||
|  | ||||
| To know how are those three attributes used, reference the picture:  | ||||
| To know how are those three attributes used, reference the picture: | ||||
|  | ||||
| .. image:: chain_tasks_logic.png | ||||
|  | ||||
| .. image:: chain_tasks_logic.png  | ||||
|   | ||||
|  | ||||
|   | ||||
| @@ -18,14 +18,14 @@ Currently only the ``bmcsetup`` command is officially supplied by xCAT to run to | ||||
|     runimage=<URL> | ||||
|  | ||||
| **URL** is a string which can be run by ``wget`` to download the image from the URL. The example could be: :: | ||||
|    | ||||
|  | ||||
|     runimage=http://<IP of xCAT Management Node>/<dir>/image.tgz | ||||
|  | ||||
| The ``image.tgz`` **must** have the following properties: | ||||
|   * Created using the ``tar zcvf`` command | ||||
|   * The tarball must include a ``runme.sh`` script to initiate the execution of the runimage | ||||
|  | ||||
| To create your own image, reference :ref:`creating image for runimage <create_image_for_runimage>`.  | ||||
| To create your own image, reference :ref:`creating image for runimage <create_image_for_runimage>`. | ||||
|  | ||||
| **Tip**: You could try to run ``wget http://<IP of xCAT Management Node>/<dir>/image.tgz`` manually to make sure the path has been set correctly. | ||||
|  | ||||
| @@ -41,5 +41,5 @@ Causes the genesis kernel to create a shell for the administrator to log in and | ||||
|  | ||||
| * standby | ||||
|  | ||||
| Causes the genesis kernel to go into standby and wait for tasks from the chain. ...  | ||||
| Causes the genesis kernel to go into standby and wait for tasks from the chain. ... | ||||
|  | ||||
|   | ||||
| @@ -14,7 +14,7 @@ How to prepare a image for ``runimage`` in ``chain`` | ||||
|     * go to the directory and run `tar -zcvf <image> .` | ||||
|  | ||||
| * Example | ||||
|     In the example, it shows how to install an independent pkg a.rpm  | ||||
|     In the example, it shows how to install an independent pkg a.rpm | ||||
|  | ||||
|     * Create the directory for the image: :: | ||||
|  | ||||
| @@ -29,7 +29,7 @@ How to prepare a image for ``runimage`` in ``chain`` | ||||
|  | ||||
|          cat runme.sh | ||||
|          echo "start installing a.rpm" | ||||
|          rpm -ivh a.rpm   | ||||
|          rpm -ivh a.rpm | ||||
|  | ||||
|     * modify the runme.sh script permission: :: | ||||
|  | ||||
|   | ||||
| @@ -12,4 +12,4 @@ The **chain** mechanism is implemented in xCAT genesis system. The genesis is a | ||||
|     chain_tasks.rst | ||||
|     run_tasks_during_discovery.rst | ||||
|     run_tasks_to_configure.rst | ||||
|       | ||||
|  | ||||
|   | ||||
| @@ -4,5 +4,5 @@ Compute Node | ||||
| .. toctree:: | ||||
|    :maxdepth: 2 | ||||
|  | ||||
|    changing_hostname_ip.rst  | ||||
|    changing_hostname_ip.rst | ||||
|    replace/index.rst | ||||
|   | ||||
| @@ -4,7 +4,7 @@ OpenPOWER Nodes | ||||
|  | ||||
| When compute nodes are physically replaced in the frame, leverage xCAT to re-discover the compute nodes.  The following guide can be used for: | ||||
|  | ||||
|   * IBM OpenPOWER S822LC for HPC  | ||||
|   * IBM OpenPOWER S822LC for HPC | ||||
|  | ||||
|  | ||||
| #. Identify the machine(s) to be replaced: ``frame10cn02``. | ||||
| @@ -15,24 +15,24 @@ When compute nodes are physically replaced in the frame, leverage xCAT to re-dis | ||||
|  | ||||
| #. Set the outgoing machine to ``offline`` and remove attributes of the machine: :: | ||||
|  | ||||
|     nodeset frame10cn02 offline  | ||||
|     nodeset frame10cn02 offline | ||||
|     chdef frame10cn02 mac="" | ||||
|  | ||||
| #. If using **MTMS**-based discovery, fill in the Model-Type and Serial Number for the machine: :: | ||||
|  | ||||
|     chdef frame10cn02 mtm=8335-GTB serial=<NEW SERIAL NUMBER> | ||||
|  | ||||
| #. If using **SWITCH**-based discovery, go on to the next step. The ``switch`` and ``switch-port`` should already be set in the compute node definition.  | ||||
| #. If using **SWITCH**-based discovery, go on to the next step. The ``switch`` and ``switch-port`` should already be set in the compute node definition. | ||||
|  | ||||
|    Node attributes will be replaced during the discovery process (mtm, serial, mac, etc.) | ||||
|  | ||||
| #. Search for the new BMC in the open range: :: | ||||
|  | ||||
|     bmcdiscover --range <IP open range> -w -z  | ||||
|     bmcdiscover --range <IP open range> -w -z | ||||
|  | ||||
| #. When the BMC is found, start the discovery with the following commands: :: | ||||
|  | ||||
|     rsetboot /node-8335.* net | ||||
|     rpower /node-8335.* boot  | ||||
|     rpower /node-8335.* boot | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -4,7 +4,7 @@ Cluster Maintenance | ||||
| .. toctree:: | ||||
|    :maxdepth: 2 | ||||
|  | ||||
|    compute_node/index.rst  | ||||
|    compute_node/index.rst | ||||
|    mgmt_node/index.rst | ||||
|    service_node/index.rst | ||||
|    sw_fw_inventory.rst | ||||
|   | ||||
| @@ -70,7 +70,7 @@ Nothing to do. | ||||
| PostgreSQL | ||||
| ^^^^^^^^^^ | ||||
|  | ||||
| - Edit ``/etc/xcat/cfgloc`` file...  | ||||
| - Edit ``/etc/xcat/cfgloc`` file... | ||||
|  | ||||
|    Replace ``Pg:dbname=xcatdb;host=<old_MN_ip>|xcatadm|xcat20`` with ``Pg:dbname=xcatdb;host=<new_MN_ip>|xcatadm|xcat20``. | ||||
|  | ||||
| @@ -81,7 +81,7 @@ PostgreSQL | ||||
| MySQL | ||||
| ^^^^^ | ||||
|  | ||||
| - Edit ``/etc/xcat/cfglooc``...  | ||||
| - Edit ``/etc/xcat/cfglooc``... | ||||
|     Replace ``mysql:dbname=xcatdb;host=<old_MN_ip>|xcatadmin|xcat20`` with ``mysql:dbname=xcatdb;host=<new_MN_ip>|xcatadmin|xcat20`` | ||||
|  | ||||
| Start the database | ||||
| @@ -151,7 +151,7 @@ For example, the old IP address was "10.6.0.1" | ||||
|  | ||||
|     chdef -t node cn1-cn4 conserver=<new_ip_address> | ||||
|  | ||||
| * Repeat the same process for the other attributes containing the old IP address.  | ||||
| * Repeat the same process for the other attributes containing the old IP address. | ||||
|  | ||||
| Change networks table | ||||
| ^^^^^^^^^^^^^^^^^^^^^ | ||||
| @@ -178,7 +178,7 @@ If the old address still exists in the ``*.csv`` file, you can edit this file, t | ||||
| Generate SSL credentials(optional) | ||||
| ---------------------------------- | ||||
|  | ||||
| Use the following command to generate new SSL credentials: ``xcatconfig -c``.  | ||||
| Use the following command to generate new SSL credentials: ``xcatconfig -c``. | ||||
|  | ||||
| Then update the following in xCAT: | ||||
|  | ||||
|   | ||||
| @@ -4,4 +4,4 @@ Management Node | ||||
| .. toctree:: | ||||
|    :maxdepth: 2 | ||||
|  | ||||
|    changing_hostname_ip.rst  | ||||
|    changing_hostname_ip.rst | ||||
|   | ||||
| @@ -4,4 +4,4 @@ Service Node | ||||
| .. toctree:: | ||||
|    :maxdepth: 2 | ||||
|  | ||||
|    changing_hostname_ip.rst  | ||||
|    changing_hostname_ip.rst | ||||
|   | ||||
| @@ -5,7 +5,7 @@ Starting the confetty client | ||||
| As the root user, running ``/opt/confluent/bin/confetty`` will open the confetty prompt :: | ||||
|  | ||||
|       [root@c910f02c05p03 ~]# /opt/confluent/bin/confetty | ||||
|       / ->  | ||||
|       / -> | ||||
|  | ||||
| Creating a non root user | ||||
| ======================== | ||||
| @@ -26,19 +26,19 @@ It's recommenteed to create a non root user to use to connect to confetty | ||||
|       password="********" | ||||
|  | ||||
|  | ||||
| Connecting to a remote server  | ||||
| Connecting to a remote server | ||||
| ============================= | ||||
|  | ||||
|  | ||||
| In order to do remote sessions, keys must first be added to ``/etc/confluent`` | ||||
|  | ||||
| * /etc/confluent/privkey.pem - private key  | ||||
| * /etc/confluent/privkey.pem - private key | ||||
| * /etc/confluent/srvcert.pem - server cert | ||||
|  | ||||
| If you want to use the xCAT Keys, you can simple copy them into ``/etc/confluent`` :: | ||||
|  | ||||
|     cp /etc/xcat/cert/server-key.pem /etc/confluent/privkey.pem | ||||
|     cp /etc/xcat/cert/server-cert.pem /etc/confluent/srvcert.pem  | ||||
|     cp /etc/xcat/cert/server-cert.pem /etc/confluent/srvcert.pem | ||||
|  | ||||
| The user and password may alternatively be provided via environment variables: :: | ||||
|  | ||||
| @@ -55,4 +55,4 @@ If you want to run a confluent command against another host, could set the CONFL | ||||
|     CONFLUENT_HOST=<remote_ip> | ||||
|     export CONFLUENT_HOST | ||||
|  | ||||
|   | ||||
|  | ||||
|   | ||||
| @@ -4,4 +4,4 @@ confluent-client | ||||
| .. toctree:: | ||||
|    :maxdepth: 2 | ||||
|  | ||||
|    confluent_client.rst  | ||||
|    confluent_client.rst | ||||
|   | ||||
| @@ -1,9 +1,9 @@ | ||||
| Confluent  | ||||
| Confluent | ||||
| ========= | ||||
|  | ||||
| Confluent is a new codebase with a few goals in mind: | ||||
|  | ||||
| * Augment xCAT 2.X series  | ||||
| * Augment xCAT 2.X series | ||||
| * Potentially serve in place of xCAT-server for the next generation of xCAT | ||||
|  | ||||
| **Disclaimer:** *Confluent code in conjunction with xCAT 2.X is currently BETA, use at your own risk* | ||||
|   | ||||
| @@ -1,8 +1,8 @@ | ||||
|  | ||||
| Getting Started  | ||||
| Getting Started | ||||
| =============== | ||||
|  | ||||
| Confluent is intended to be used in conjunction with xCAT.  | ||||
| Confluent is intended to be used in conjunction with xCAT. | ||||
| The following documentation assumes that xCAT is already installed and configured on the management node. | ||||
|  | ||||
| Download confluent | ||||
| @@ -20,7 +20,7 @@ To build from source, ensure your machine has the correct development packages t | ||||
|         cd confluent/confluent_client ; ./buildrpm ; cd - | ||||
|  | ||||
|  | ||||
| Install  | ||||
| Install | ||||
| ======= | ||||
|  | ||||
| dependency | ||||
| @@ -59,7 +59,7 @@ To start confluent:: | ||||
|     service confluent start | ||||
|  | ||||
| To stop confluent:: | ||||
|     | ||||
|  | ||||
|     service confluent stop | ||||
|  | ||||
| If you want confluent daemon to start automatically at bootup, add confluent service to ``chkconfig``:: | ||||
| @@ -113,11 +113,11 @@ Configure the httpd configuration for confluent-api by creating a ``confluent.co | ||||
|     <Location /confluent-api> | ||||
|             ProxyPass http://10.2.5.3:4005 | ||||
|     </Location> | ||||
|      | ||||
|     # restart httpd   | ||||
|  | ||||
|     # restart httpd | ||||
|     service httpd restart | ||||
|  | ||||
| Now point your browser to: ``http://<server ip>:<port>`` and log in with the non-root user and password created above.  | ||||
| Now point your browser to: ``http://<server ip>:<port>`` and log in with the non-root user and password created above. | ||||
|  | ||||
| Confluent consoles | ||||
| ================== | ||||
|   | ||||
| @@ -1,8 +1,8 @@ | ||||
| confluent-server  | ||||
| confluent-server | ||||
| ================ | ||||
|  | ||||
|  | ||||
| .. toctree:: | ||||
|    :maxdepth: 2 | ||||
|  | ||||
|    confluent_server.rst  | ||||
|    confluent_server.rst | ||||
|   | ||||
| @@ -36,7 +36,7 @@ Create Configuration File | ||||
| ''''''''''''''''''''''''' | ||||
|  | ||||
| Define configuration file ``docker-registry`` under ``/docker-registry/`` folder as below. :: | ||||
|    | ||||
|  | ||||
|     #!/bin/bash | ||||
|  | ||||
|     docker_command=$1 | ||||
| @@ -84,7 +84,7 @@ Start registry service: :: | ||||
| Method 2: Managing Docker Registry with Compose | ||||
| ``````````````````````````````````````````````` | ||||
|  | ||||
| Docker Compose it is a tool for defining and running Docker applications. It could help setting up registry.  | ||||
| Docker Compose it is a tool for defining and running Docker applications. It could help setting up registry. | ||||
|  | ||||
| Install Docker Compose | ||||
| '''''''''''''''''''''' | ||||
| @@ -147,10 +147,10 @@ List Available Images in Registry | ||||
| ````````````````````````````````````` | ||||
| :: | ||||
|  | ||||
|     curl -k https://domainname:5000/v2/_catalog  | ||||
|     curl -k https://domainname:5000/v2/_catalog | ||||
|  | ||||
| Pull Images from Registry | ||||
| `````````````````````````   | ||||
| ````````````````````````` | ||||
| Just use the "tag" image name, which includes the domain name, port, and image name. :: | ||||
|  | ||||
|     docker pull domainname:5000/imagename | ||||
|   | ||||
| @@ -16,7 +16,7 @@ Overview | ||||
|    :align: right | ||||
|  | ||||
|  | ||||
| **Compose** is a native tool shipped by Docker to define and run applications in Docker containers. You use a Compose file to configure your applications/services. Then, using a single command, you create and start all the services from your configuration.   | ||||
| **Compose** is a native tool shipped by Docker to define and run applications in Docker containers. You use a Compose file to configure your applications/services. Then, using a single command, you create and start all the services from your configuration. | ||||
|  | ||||
| By pulling xCAT Docker image and running xCAT Docker image in a container, you get a well-configured xCAT Management Node to start cluster management work, without worrying about the xCAT installation and configuration on different OS and various hardware platforms. | ||||
|  | ||||
| @@ -24,10 +24,10 @@ By pulling xCAT Docker image and running xCAT Docker image in a container, you g | ||||
| xCAT Docker images | ||||
| ------------------ | ||||
|  | ||||
| xCAT ships 2 Docker images for Docker host with different architecture:  | ||||
| xCAT ships 2 Docker images for Docker host with different architecture: | ||||
|  | ||||
| * "xcat/xcat-ubuntu-x86_64": run on x86_64 Docker host | ||||
| * "xcat/xcat-ubuntu-ppc64le": run on ppc64le Docker host  | ||||
| * "xcat/xcat-ubuntu-ppc64le": run on ppc64le Docker host | ||||
|  | ||||
| Each of the xCAT Docker images above has 3 tags corresponding to different xCAT release inside Docker image: | ||||
|  | ||||
| @@ -44,7 +44,7 @@ Run xCAT in Docker | ||||
| Each container with xCAT Docker image running inside is a xCAT management node, the container connects to the compute nodes and hardware control points in the cluster via "bridge" network on the Docker host. Generally, a xCAT container should connect to 2 types of networks( the 2 types of networks might be one network in some cluster): | ||||
|  | ||||
| * "mgtnet": Management network, the network used by the Management Node to install operating systems and manage the nodes. The Management Node and in-band Network Interface Card (NIC) of the nodes are connected to this network. A bridge "mgtbr" will be created and attached to the network interface facing the compute nodes on Docker host | ||||
| * "svcnet": Service network, the network used by the Management Node to control the nodes using out-of-band management using the Service Processor. A bridge "svcbr" will be created and attached to the network interface facing the hardware control points  | ||||
| * "svcnet": Service network, the network used by the Management Node to control the nodes using out-of-band management using the Service Processor. A bridge "svcbr" will be created and attached to the network interface facing the hardware control points | ||||
|  | ||||
| You are required to determine and specify some necessary information, so that xCAT is well configured and running when the container is started. This includes: | ||||
|  | ||||
| @@ -52,7 +52,7 @@ You are required to determine and specify some necessary information, so that xC | ||||
| * network information: the network configuration of the xCAT container | ||||
| * cluster information: the domain of the cluster | ||||
|  | ||||
| The information can be specified in 2 ways to run xCAT container:  | ||||
| The information can be specified in 2 ways to run xCAT container: | ||||
|  | ||||
| * in options and arguments of docker commands such as ``docker network create`` or ``docker run`` | ||||
| * in the "docker-compose.yml", which contains all the configuration to start xCAT containers with Compose. This is the recommended way to start xCAT container. | ||||
| @@ -66,7 +66,7 @@ When xCAT Docker container is started, you can access it with ``sudo docker atta | ||||
| .. toctree:: | ||||
|    :maxdepth: 2 | ||||
|  | ||||
|    setup_docker_host.rst  | ||||
|    setup_docker_host.rst | ||||
|    run_xcat_in_docker_compose.rst | ||||
|    run_xcat_in_docker_native.rst | ||||
|  | ||||
| @@ -77,21 +77,21 @@ Work with xCAT | ||||
|  | ||||
| Once xCAT Docker container is running, you can use xCAT with the shell inside the container. Since the ssh service has been enabled on the Docker container startup, you can connect to the container via ssh. The default root password is "cluster". | ||||
|  | ||||
| Once you attach or ssh to the container, you will find that xCAT is running and configured, you can play with xCAT and manage your cluster now.  | ||||
| Once you attach or ssh to the container, you will find that xCAT is running and configured, you can play with xCAT and manage your cluster now. | ||||
|  | ||||
| Currently, since xCAT can only generate the diskless osimages of Linux distributions with the same OS version and architecture with xCAT MN. If you need to provision diskless osimages besides ubuntu x86_64 with xCAT running in the Docker, you can use ``imgexport`` and ``imgimport`` to import the diskless osimages generated before. | ||||
|  | ||||
| If you start up the xCAT Docker container by following the steps described in sections above strictly, without specifying "--dns=IP_ADDRESS...", "--dns-search=DOMAIN...", or "--dns-opt=OPTION..." options, Docker uses the /etc/resolv.conf of the host machine (where the docker daemon runs). Any DNS problem inside container, make sure the DNS server on the Docker host works well. | ||||
|  | ||||
| Save and Restore xCAT data  | ||||
| Save and Restore xCAT data | ||||
| ---------------------------- | ||||
|  | ||||
| According to the policy of Docker, Docker image should only be the service deployment unit, it is not recommended to save data in Docker image. Docker uses "Data Volume" to save persistent data inside container, which can be simply taken as a shared directory between Docker host and Docker container.  | ||||
| According to the policy of Docker, Docker image should only be the service deployment unit, it is not recommended to save data in Docker image. Docker uses "Data Volume" to save persistent data inside container, which can be simply taken as a shared directory between Docker host and Docker container. | ||||
|  | ||||
| For dockerized xCAT, there are 3 volumes recommended to save and restore xCAT user data. | ||||
|  | ||||
| * "/install":        save the osimage resources under "/install" directory  | ||||
| * "/var/log/xcat/":  save xCAT logs  | ||||
| * "/install":        save the osimage resources under "/install" directory | ||||
| * "/var/log/xcat/":  save xCAT logs | ||||
| * "/.dbbackup":      save and restore xCAT DB tables. You can save the xCAT DB tables with ``dumpxCATdb -p /.dbbackup/`` inside container and xCAT will restore the tables on the container start up. | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -3,14 +3,14 @@ Run xCAT in Docker with Compose (Recommended) | ||||
|  | ||||
|  | ||||
| An example configuration in the documentation | ||||
| ---------------------------------------------  | ||||
| --------------------------------------------- | ||||
|  | ||||
| To demonstrate the steps to run xCAT in a Docker container, take a cluster with the following configuration as an example :: | ||||
|  | ||||
|  | ||||
|     The name of the docker container running xCAT: xcatmn  | ||||
|     The name of the docker container running xCAT: xcatmn | ||||
|     The hostname of container xcatmn: xcatmn | ||||
|     The dns domain of the cluster: clusters.com  | ||||
|     The dns domain of the cluster: clusters.com | ||||
|  | ||||
|     The management network object: mgtnet | ||||
|     The network bridge of management network on Docker host: mgtbr | ||||
| @@ -24,7 +24,7 @@ To demonstrate the steps to run xCAT in a Docker container, take a cluster with | ||||
|     The IP address of eno2 on Docker host: 192.168.0.1/8 | ||||
|     The IP address of xCAT container in service network: 192.168.0.101 | ||||
|  | ||||
|   | ||||
|  | ||||
| Install Compose on Docker host | ||||
| ------------------------------ | ||||
|  | ||||
| @@ -34,27 +34,27 @@ Compose v1.7.0 or above should be installed on Docker host: :: | ||||
|     chmod +x /usr/local/bin/docker-compose | ||||
|  | ||||
|  | ||||
| Customize docker-compose file  | ||||
| Customize docker-compose file | ||||
| ----------------------------- | ||||
|  | ||||
| xCAT ships a docker-compose template `docker-compose.yml <https://github.com/immarvin/xcat-docker/blob/master/docker-compose.yml>`_, which is a self-description file including all the configurations to run xCAT in container. You can make up your compose file based on it if you are familiar with `Compose file <https://docs.docker.com/compose/compose-file/>`_ , otherwise, you can simply customize it with the following steps:  | ||||
| xCAT ships a docker-compose template `docker-compose.yml <https://github.com/immarvin/xcat-docker/blob/master/docker-compose.yml>`_, which is a self-description file including all the configurations to run xCAT in container. You can make up your compose file based on it if you are familiar with `Compose file <https://docs.docker.com/compose/compose-file/>`_ , otherwise, you can simply customize it with the following steps: | ||||
|  | ||||
| 1. Specify the xCAT Docker image | ||||
|  | ||||
| :: | ||||
|  | ||||
|     image: [xCAT Docker image name]:[tag]   | ||||
|   | ||||
| specify the name and tag of xCAT Docker image, for example "xcat/xcat-ubuntu-x86_64:2.11"  | ||||
|     image: [xCAT Docker image name]:[tag] | ||||
|  | ||||
| 2. Specify the cluster domain name  | ||||
| specify the name and tag of xCAT Docker image, for example "xcat/xcat-ubuntu-x86_64:2.11" | ||||
|  | ||||
| ::  | ||||
| 2. Specify the cluster domain name | ||||
|  | ||||
| :: | ||||
|  | ||||
|     extra_hosts: | ||||
|        - "xcatmn.[cluster domain name] xcatmn:[Container's IP address in management network]" | ||||
|  | ||||
| specify the cluster domain name,i.e, "site.domain" on xCAT Management Node, for example "clusters.com", and the IP address of xCAT Docker container in the management network, such as "10.5.107.101"  | ||||
| specify the cluster domain name,i.e, "site.domain" on xCAT Management Node, for example "clusters.com", and the IP address of xCAT Docker container in the management network, such as "10.5.107.101" | ||||
|  | ||||
| 3. Specify the IP address of xCAT container in service network and management network | ||||
|  | ||||
| @@ -66,7 +66,7 @@ specify the cluster domain name,i.e, "site.domain" on xCAT Management Node, for | ||||
|         ipv4_address : [Container's IP address in service network] | ||||
|  | ||||
|       mgtnet: | ||||
|         ipv4_address : [Container's IP address in management network]   | ||||
|         ipv4_address : [Container's IP address in management network] | ||||
|  | ||||
| specify the IP address of Docker container in service network and management network. If the "svcnet" is the same as "mgtnet", the 2 "svcnet" lines should be commented out. | ||||
|  | ||||
| @@ -75,30 +75,30 @@ specify the IP address of Docker container in service network and management net | ||||
| :: | ||||
|  | ||||
|     networks: | ||||
|        | ||||
|       #management network, attached to the network interface on Docker host  | ||||
|  | ||||
|       #management network, attached to the network interface on Docker host | ||||
|       #facing the nodes to provision | ||||
|       mgtnet: | ||||
|         driver: "bridge" | ||||
|         driver_opts:  | ||||
|           com.docker.network.bridge.name: "mgtbr"  | ||||
|         ipam:  | ||||
|           config:  | ||||
|         driver_opts: | ||||
|           com.docker.network.bridge.name: "mgtbr" | ||||
|         ipam: | ||||
|           config: | ||||
|             - subnet: [subnet of mgtbr in CIDR] | ||||
|               gateway:[IP address of mgtbr] | ||||
|          | ||||
|  | ||||
|       #service network, attached to the network interface on | ||||
|       #Docker host facing the bmc network | ||||
|       svcnet: | ||||
|         driver: "bridge" | ||||
|         driver_opts:  | ||||
|           com.docker.network.bridge.name: "svcbr"  | ||||
|         ipam:  | ||||
|           config:  | ||||
|         driver_opts: | ||||
|           com.docker.network.bridge.name: "svcbr" | ||||
|         ipam: | ||||
|           config: | ||||
|             - subnet: [subnet of svcbr in CIDR] | ||||
|               gateway: [IP address of svcbr] | ||||
|      | ||||
| specify the network configuration of bridge networks "mgtnet" and "svcnet", the network configuration of the bridge networks should be same as the network interfaces attached to the bridges. The "mgtnet" and "svcnet" might the same network in some cluster, in this case, you can ignore the lines for "svcnet".   | ||||
|  | ||||
| specify the network configuration of bridge networks "mgtnet" and "svcnet", the network configuration of the bridge networks should be same as the network interfaces attached to the bridges. The "mgtnet" and "svcnet" might the same network in some cluster, in this case, you can ignore the lines for "svcnet". | ||||
|  | ||||
| 5. Specify the Data Volumes for xCAT Docker container | ||||
|  | ||||
| @@ -114,16 +114,16 @@ specify the network configuration of bridge networks "mgtnet" and "svcnet", the | ||||
|       #"dumpxCATdb -p /.dbbackup" should be run manually to save xCAT DB inside container | ||||
|       - [The directory on Docker host mounted to save xCAT DB inside container]:/.dbbackup | ||||
|       #the "/.logs" value is used to keep xCAT logs | ||||
|       #the xCAT logs will be kept if specified  | ||||
|       #the xCAT logs will be kept if specified | ||||
|       - [The directory on Docker host to save xCAT logs inside container]:/var/log/xcat/ | ||||
|  | ||||
| specify the volumes of the xCAT container used to save and restore xCAT data | ||||
|  | ||||
|  | ||||
| Start xCAT Docker container with Compose  | ||||
| Start xCAT Docker container with Compose | ||||
| ---------------------------------------- | ||||
| After the "docker-compose.yml" is ready, the xCAT Docker container can be started with [1]_ :: | ||||
|    | ||||
|  | ||||
|    docker-compose -f "docker-compose.yml" up -d; \ | ||||
|    ifconfig eno1 0.0.0.0; \ | ||||
|    brctl addif mgtbr eno1; \ | ||||
| @@ -139,7 +139,7 @@ To remove the container, you can run :: | ||||
|   ifup eno1 | ||||
|  | ||||
| To update the xCAT Docker image, you can run :: | ||||
|    | ||||
|  | ||||
|   docker-compose -f "docker-compose.yml" pull | ||||
|  | ||||
|  | ||||
| @@ -151,6 +151,6 @@ Known Issues | ||||
| When you start up xCAT Docker container, you might see an error message at the end of the output like :: | ||||
|  | ||||
|   Couldn't connect to Docker daemon at http+unix://var/run/docker.sock - is it running? If it's at a non-standard location, specify the URL with the DOCKER_HOST environment variable. | ||||
|     | ||||
|  | ||||
| You can ignore it, the container has already been running. It is a Docker bug `#1214 <https://github.com/docker/compose/issues/1214>`_ | ||||
|     | ||||
|  | ||||
|   | ||||
| @@ -9,7 +9,7 @@ Now xCAT ships xCAT Docker images(x86_64 and ppc64le) on the `DockerHub <https:/ | ||||
|  | ||||
| To pull the latest xCAT Docker image for x86_64, run :: | ||||
|  | ||||
|     sudo docker pull xcat/xcat-ubuntu-x86_64         | ||||
|     sudo docker pull xcat/xcat-ubuntu-x86_64 | ||||
|  | ||||
| On success, you will see the pulled Docker image on Docker host :: | ||||
|  | ||||
| @@ -19,12 +19,12 @@ On success, you will see the pulled Docker image on Docker host :: | ||||
|  | ||||
|  | ||||
| An example configuration in the documentation | ||||
| ---------------------------------------------  | ||||
| --------------------------------------------- | ||||
|  | ||||
| To demonstrate the steps to run xCAT in a Docker container, take a cluster with the following configuration as an example :: | ||||
|  | ||||
|     Docker host: dockerhost1 | ||||
|     The name of the docker container running xCAT: xcatmn  | ||||
|     The name of the docker container running xCAT: xcatmn | ||||
|     The hostname of container xcatmn: xcatmn | ||||
|  | ||||
|     The management network object: mgtnet | ||||
| @@ -33,15 +33,15 @@ To demonstrate the steps to run xCAT in a Docker container, take a cluster with | ||||
|     The IP address of eno1 on Docker host: 10.5.107.1/8 | ||||
|     The IP address of xCAT container in management network: 10.5.107.101 | ||||
|  | ||||
|     The dns domain of the cluster: clusters.com  | ||||
|     The dns domain of the cluster: clusters.com | ||||
|  | ||||
|  | ||||
| Create a customized Docker network on the Docker host | ||||
| ----------------------------------------------------- | ||||
|  | ||||
| **Docker Networks** provide complete isolation for containers, which gives you control over the networks your containers run on. To run xCAT in Docker, you should create a customized bridge network according to the cluster network plan, instead of using the default bridge network created on Docker installation.  | ||||
| **Docker Networks** provide complete isolation for containers, which gives you control over the networks your containers run on. To run xCAT in Docker, you should create a customized bridge network according to the cluster network plan, instead of using the default bridge network created on Docker installation. | ||||
|  | ||||
| As an example, we create a customized bridge network "mgtbr" which is attached to the network interface "eno1" facing the compute nodes and inherits the network configuration of "eno1". Since the commands to create the network will break the network connection on "eno1", you'd better run the commands in one line instead of running them seperatly ::    | ||||
| As an example, we create a customized bridge network "mgtbr" which is attached to the network interface "eno1" facing the compute nodes and inherits the network configuration of "eno1". Since the commands to create the network will break the network connection on "eno1", you'd better run the commands in one line instead of running them seperatly :: | ||||
|  | ||||
|     sudo docker network create --driver=bridge --gateway=10.5.107.1 --subnet=10.5.107.0/8 -o "com.docker.network.bridge.name"="mgtbr" mgtnet; \ | ||||
|     ifconfig eno1 0.0.0.0; \ | ||||
| @@ -51,7 +51,7 @@ As an example, we create a customized bridge network "mgtbr" which is attached t | ||||
| * ``--driver=bridge`` specify the network driver to be "bridge" | ||||
| * ``--gateway=10.5.107.1`` specify the network gateway to be the IP address of "eno1" on Docker host. which will also be the IP address of network bridge "mgtbr" | ||||
| * ``--subnet=10.5.107.0/8`` speify the subnet in CIDR format to be the subnet of "eno1" | ||||
| * ``com.docker.network.bridge.name"="mgtbr"`` specify the bridge name of management network  | ||||
| * ``com.docker.network.bridge.name"="mgtbr"`` specify the bridge name of management network | ||||
| * ``ifconfig eno1 0.0.0.0`` delete the IP address of "eno1" | ||||
| * ``brctl addif mgtbr eno1`` attach the bridge "br0" to network interface "eno1" | ||||
| * ``ip link set mgtbr up`` change the state of "br0" to UP | ||||
| @@ -68,7 +68,7 @@ Now run the xCAT Docker container with the Docker image "xcat/xcat-ubuntu-x86_64 | ||||
|  | ||||
| * use ``--privileged=true`` to give extended privileges to this container | ||||
| * use ``--hostname`` to specify the hostname of the container, which is available inside the container | ||||
| * use ``--name`` to assign a name to the container, this name can be used to manipulate the container on Docker host  | ||||
| * use ``--name`` to assign a name to the container, this name can be used to manipulate the container on Docker host | ||||
| * use ``--add-host="xcatmn.clusers.com xcatmn:10.5.107.101"`` to write the ``/etc/hosts`` entries of Docker container inside container. Since xCAT use the FQDN(Fully Qualified Domain Name) to determine the cluster domain on startup, make sure the format to be "<FQDN> <hostname>: <IP Address>", otherwise, you need to set the cluster domain with ``chdef -t site -o clustersite domain="clusters.com"`` inside the container manually | ||||
| * use ``--volume /docker/xcatdata/:/install`` to mount a pre-created "/docker/xcatdata" directory on Docker host to "/install" directory inside container as a data volume. This is optional, it is mandatory if you want to backup and restore xCAT data. | ||||
| * use ``--net=mgtnet`` to connect the container to the Docker network "mgtnet" | ||||
|   | ||||
| @@ -4,14 +4,14 @@ Setup Docker host | ||||
| Install Docker Engine | ||||
| --------------------- | ||||
|  | ||||
| The Docker host to run xCAT Docker image should be a baremental or virtual server with Docker v1.10 or above installed. For the details on system requirements and Docker installation, refer to `Docker Installation Docs <https://docs.docker.com/engine/installation/>`_.  | ||||
| The Docker host to run xCAT Docker image should be a baremental or virtual server with Docker v1.10 or above installed. For the details on system requirements and Docker installation, refer to `Docker Installation Docs <https://docs.docker.com/engine/installation/>`_. | ||||
|  | ||||
| .. note:: Docker images can only run on Docker hosts with the same architecture.  Since xCAT only ships x86_64 and ppc64le Docker images, running xCAT in Docker requires x86_64 or ppc64 Docker Hosts. | ||||
|  | ||||
| Shutdown the SELinux/Apparmor on Docker host | ||||
| -------------------------------------------- | ||||
|  | ||||
| If the SELinux or Apparmor on Docker host is enabled, the services/applications inside Docker Container might be confined. To run xCAT in Docker container, SELinux and Apparmor on the Docker host must be disabled.  | ||||
| If the SELinux or Apparmor on Docker host is enabled, the services/applications inside Docker Container might be confined. To run xCAT in Docker container, SELinux and Apparmor on the Docker host must be disabled. | ||||
|  | ||||
| SELinux can be disabled with: :: | ||||
|  | ||||
|   | ||||
| @@ -4,6 +4,6 @@ Docker | ||||
| .. toctree:: | ||||
|    :maxdepth: 2 | ||||
|  | ||||
|    dockerized_xcat/dockerized_xcat.rst    | ||||
|    dockerized_xcat/dockerized_xcat.rst | ||||
|    lifecycle_management.rst | ||||
|    docker_registry.rst | ||||
|   | ||||
| @@ -3,9 +3,9 @@ Docker life-cycle management in xCAT | ||||
|  | ||||
| The Docker linux container technology is currently very popular. xCAT can help managing Docker containers. xCAT, as a system management tool has the natural advantage for supporting multiple operating systems, multiple architectures and large scale clusters. | ||||
|  | ||||
| This document describes how to use xCAT for docker management, from Docker Host setup to docker container operations.  | ||||
| This document describes how to use xCAT for docker management, from Docker Host setup to docker container operations. | ||||
|  | ||||
| .. note:: This document was verified with:  | ||||
| .. note:: This document was verified with: | ||||
|              * Docker Version 1.10, 1.11 | ||||
|              * Docker API version 1.22 | ||||
|  | ||||
| @@ -19,7 +19,7 @@ This document describes how to use xCAT for docker management, from Docker Host | ||||
| Setting up Docker Host | ||||
| ---------------------- | ||||
|  | ||||
| The **Docker Host** is the bare metal server or virtual machine where Docker containers can run. It will be called *dockerhost* in the following sections.  | ||||
| The **Docker Host** is the bare metal server or virtual machine where Docker containers can run. It will be called *dockerhost* in the following sections. | ||||
|  | ||||
| The *dockerhost* at a minimum must provide the following: | ||||
|  | ||||
| @@ -28,12 +28,12 @@ The *dockerhost* at a minimum must provide the following: | ||||
|  | ||||
| Preparing osimage for docker host | ||||
| ````````````````````````````````` | ||||
| The osimage represents the image of the Operating System which will be deployed on the dockerhost.  | ||||
| The osimage represents the image of the Operating System which will be deployed on the dockerhost. | ||||
|  | ||||
| Copy files out from DVDs/ISOs and generate   | ||||
| Copy files out from DVDs/ISOs and generate | ||||
| """""""""""""""""""""""""""""""""""""""""" | ||||
| **[ubuntu x86_64]** ::   | ||||
|     | ||||
| **[ubuntu x86_64]** :: | ||||
|  | ||||
|   copycds ubuntu-xxx-server-amd64.iso | ||||
|  | ||||
| **[ubuntu16.04 ppc64el]** :: | ||||
| @@ -52,31 +52,31 @@ The pkglist file should contain the following: :: | ||||
|  nfs-common | ||||
|  snmpd | ||||
|  bridge-utils | ||||
|   | ||||
| The otherpkglist file should contain the following:  | ||||
|  | ||||
| **[ubuntu x86_64]** ::   | ||||
| The otherpkglist file should contain the following: | ||||
|  | ||||
| **[ubuntu x86_64]** :: | ||||
|  | ||||
|  # cat /install/custom/ubuntu/ubuntu_docker.pkglist | ||||
|  docker-engine | ||||
|  | ||||
| **[ubuntu16.04 ppc64el]**  | ||||
|   | ||||
| **[ubuntu16.04 ppc64el]** | ||||
|  | ||||
| At the time of this writing (February 2016), docker package is not available for **ppc64el** architecture from docker.org. You can follow instructions below on how to manually download and install it. | ||||
|  | ||||
| * Download docker engine for ppc64el:  | ||||
| * Download docker engine for ppc64el: | ||||
|  | ||||
| :: | ||||
|   | ||||
|  | ||||
|  wget http://launchpadlibrarian.net/251622081/docker.io_1.10.3-0ubuntu4_ppc64el.deb  -O /install/docker_ppc64el/docker.io_1.10.3-0ubuntu4_ppc64el.deb | ||||
|  | ||||
| * Configure **otherpkgdir** like this:  | ||||
| * Configure **otherpkgdir** like this: | ||||
|  | ||||
| :: | ||||
|  | ||||
|  otherpkgdir=/install/docker_ppc64el | ||||
|  | ||||
| * The **otherpkglist** file should be:  | ||||
| * The **otherpkglist** file should be: | ||||
|  | ||||
| :: | ||||
|  | ||||
| @@ -87,7 +87,7 @@ Create the osimage for dockerhost | ||||
| """"""""""""""""""""""""""""""""" | ||||
| The osimage for dockerhost will be like this: | ||||
|  | ||||
| **[ubuntu x86_64]** ::   | ||||
| **[ubuntu x86_64]** :: | ||||
|  | ||||
|  # lsdef -t osimage ub14.04.03-x86_64-dockerhost | ||||
|  Object name: ub14.04.03-x86_64-dockerhost | ||||
| @@ -125,9 +125,9 @@ Currently, a customer defined network object is needed when create a docker cont | ||||
|  | ||||
|  chdef host01 -p postbootscripts="setupdockerhost <netobj_name>=<subnet>/<netmask>@<gateway>[:nicname]" | ||||
|  | ||||
| * netobj_name: the network object to be created, it will be used in *dockernics* when creating docker container  | ||||
| * netobj_name: the network object to be created, it will be used in *dockernics* when creating docker container | ||||
| * subnet/netmask@gateway: the network which the IP address of docker container running on the docker host must be located in. If *nicname* is specified, the *subnet/netmask* must be the subnet of the nic *nicname* located in. And *gateway* shall be the IP address of the nic *nicname*. | ||||
| * nicname: the physical nic name which will be attached to the network object  | ||||
| * nicname: the physical nic name which will be attached to the network object | ||||
|  | ||||
| For example, a network object *mynet0* with subnet *10.0.0.0/16* and gateway *10.0.101.1* on nic *eth0* can be created with the command: :: | ||||
|  | ||||
| @@ -170,7 +170,7 @@ Create docker instance | ||||
| * dockerflag - A JSON string which will be used as parameters to create a docker. Reference `docker API v1.22 <https://docs.docker.com/engine/reference/api/docker_remote_api_v1.22/>`_ for more information about which parameters can be specified for "dockerflag". | ||||
|  | ||||
| To create the docker instance *host01c01* with image *ubuntu* and command */bin/bash*, use: :: | ||||
|   | ||||
|  | ||||
|  mkdocker host01c01 image=ubuntu command=/bin/bash dockerflag="{\"AttachStdin\":true,\"AttachStdout\":true,\"AttachStderr\":true,\"OpenStdin\":true}" | ||||
|  | ||||
| Remove docker instance | ||||
| @@ -242,13 +242,13 @@ If things go wrong: | ||||
|  | ||||
|  ip addr show dev <nicname> | ||||
|  | ||||
| * Run **ps -ef | grep docker** to verify docker engine is running with configured options. It should look something like  | ||||
| * Run **ps -ef | grep docker** to verify docker engine is running with configured options. It should look something like | ||||
|  | ||||
| :: | ||||
|  | ||||
|  root      3703     1  0 Apr15 ?        00:12:28 /usr/bin/docker daemon -H unix:///var/run/docker.sock -H tcp://host01:2375 --tls --tlscacert=/root/.docker/ca-cert.pem --tlscert=/root/.docker/dockerhost-cert.pem --tlskey=/root/.docker/dockerhost-cert.pem --tlsverify=true --raw-logs | ||||
|  | ||||
| If the output is missing some options, verify that file **/lib/systemd/system/docker.service** contains the following lines  | ||||
| If the output is missing some options, verify that file **/lib/systemd/system/docker.service** contains the following lines | ||||
|  | ||||
| :: | ||||
|  | ||||
|   | ||||
| @@ -78,7 +78,7 @@ Preparing for Using a DNS | ||||
|  | ||||
| If you are choosing any of the options for using DNS, follow these steps: | ||||
|  | ||||
| NOTE: This documentation only applies to the xCAT makedns command using the ddns.pm plugin. The ddns.pm plugin is based on named9/bind9, and can not support named8/bind8 due to syntax difference.  | ||||
| NOTE: This documentation only applies to the xCAT makedns command using the ddns.pm plugin. The ddns.pm plugin is based on named9/bind9, and can not support named8/bind8 due to syntax difference. | ||||
|  | ||||
|     * Set the **nameservers** and **forwarders** attributes in the xCAT site table. The **nameservers** attribute identifies the DNS server hostname/ip that the nodes point to in their **/etc/resolv.conf** files. The forwarders attribute are the DNS server's ip that can resolve external hostnames. If you are running a DNS on the xCAT MN, it will use the forwarders DNS server to resolve any hostnames it can't. | ||||
|  | ||||
| @@ -88,7 +88,7 @@ For example: :: | ||||
|  | ||||
|     * Create an /etc/resolv.conf file on the management node | ||||
|  | ||||
| Edit **/etc/resolv.conf** to contain the cluster domain value you set in the site table's **domain**  attribute above, and to point to the same DNS server you will be using for your nodes (if you are using DNS).  | ||||
| Edit **/etc/resolv.conf** to contain the cluster domain value you set in the site table's **domain**  attribute above, and to point to the same DNS server you will be using for your nodes (if you are using DNS). | ||||
|  | ||||
| Option #1: Running DNS on Your Management Node | ||||
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
| @@ -132,7 +132,7 @@ If you already have a DNS on your site network and you want to use it to solve t | ||||
|    * Set the site **nameservers** value to the IP address of the external name server. :: | ||||
|  | ||||
|         chdef -t site nameservers=<external dns IP> | ||||
|   | ||||
|  | ||||
|    * Set the correct information of external dns into the /etc/resolv.conf on your xCAT MN. | ||||
|  | ||||
| The **domain** and **nameservers** values must be set correctly in **/etc/resolv.conf**. Which should have the same values with the ones your set in the site table. | ||||
| @@ -159,7 +159,7 @@ The **domain** and **nameservers** values must be set correctly in **/etc/resolv | ||||
|               file "db.192.168.1"; | ||||
|          }; | ||||
|  | ||||
|     * To update the name resolution entries from ``/etc/hosts`` or hosts table of xCAT MN to external DNS, run ``makedns -e``  | ||||
|     * To update the name resolution entries from ``/etc/hosts`` or hosts table of xCAT MN to external DNS, run ``makedns -e`` | ||||
|  | ||||
|       Alternatively, you can set site.externaldns=1 and run ``makedns`` | ||||
|  | ||||
| @@ -191,7 +191,7 @@ The configurations are described below for the two options, note the differences | ||||
|  | ||||
| Once **/etc/hosts** is populated with all of the nodes' hostnames and IP addresses, configure DNS on the management node and start it: :: | ||||
|  | ||||
|     makedns -n        | ||||
|     makedns -n | ||||
|  | ||||
| When the **/etc/resolv.conf** files for the compute nodes are created the value of the **nameserver**  in /etc/resolv.conf is gotten from **site.nameservers** or **networks.nameservers** if it's specified. | ||||
|  | ||||
|   | ||||
| @@ -4,4 +4,4 @@ Domain Name Resolution | ||||
| .. toctree:: | ||||
|    :maxdepth: 2 | ||||
|  | ||||
|    domain_name_resolution.rst  | ||||
|    domain_name_resolution.rst | ||||
|   | ||||
| @@ -1,20 +1,30 @@ | ||||
| Quickstart | ||||
| ========== | ||||
|  | ||||
| To enable ``goconserver``, execute the following steps: | ||||
| #. For refresh xCAT installation, run the command below to start and configure ``goconserver`` | ||||
|  | ||||
| #. Install the ``goconserver`` RPM: :: | ||||
|     makegocons | ||||
|  | ||||
|       yum install goconserver | ||||
|   The new console logs will start logging to ``/var/log/consoles/<node>.log`` | ||||
|  | ||||
| #. For xCAT updating, and use ``conserver`` before, following the step below to enable ``goconserver`` | ||||
|  | ||||
| #. If upgrading xCAT running ``conserver``, stop it first: :: | ||||
|    #. stop ``conserver`` on management node | ||||
|  | ||||
|       systemctl stop conserver.service | ||||
|        systemctl stop conserver.service | ||||
|  | ||||
|    #. For hierarchical cluster, shall also stop ``conserver`` on **service nodes**, and config ``goconserver`` as console server: | ||||
|  | ||||
| #. Start ``goconserver`` and create the console configuration files with a single command :: | ||||
|        xdsh service 'systemctl stop conserver.service' | ||||
|  | ||||
|       makegocons | ||||
|        chdef -t group -o service setupconserver=2 | ||||
|  | ||||
|    The new console logs will start logging to ``/var/log/consoles/<node>.log`` | ||||
|    #. start and configure ``goconserver`` | ||||
|  | ||||
|        makegocons | ||||
|  | ||||
|      The new console logs will start logging to ``/var/log/consoles/<node>.log`` | ||||
|  | ||||
| #. To check the console status of nodes, use: | ||||
|  | ||||
|        makegocons -q | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| GPU Management and Monitoring | ||||
| ============================= | ||||
|  | ||||
| The ``nvidia-smi`` command provided by NVIDIA can be used to manage and monitor GPUs enabled Compute Nodes. In conjunction with the xCAT``xdsh`` command, you can easily manage and monitor the entire set of GPU enabled Compute Nodes remotely from the Management Node.  | ||||
| The ``nvidia-smi`` command provided by NVIDIA can be used to manage and monitor GPUs enabled Compute Nodes. In conjunction with the xCAT``xdsh`` command, you can easily manage and monitor the entire set of GPU enabled Compute Nodes remotely from the Management Node. | ||||
|  | ||||
| Example: :: | ||||
|  | ||||
| @@ -14,7 +14,7 @@ Example: :: | ||||
| Management | ||||
| ---------- | ||||
|  | ||||
| Some useful ``nvidia-smi`` example commands for management.   | ||||
| Some useful ``nvidia-smi`` example commands for management. | ||||
|  | ||||
| 	 | ||||
|     * Set persistence mode, When persistence mode is enabled the NVIDIA driver remains loaded even when no active clients, DISABLED by default:: | ||||
| @@ -29,11 +29,11 @@ Some useful ``nvidia-smi`` example commands for management. | ||||
|  | ||||
|         nvidia-smi -i 0 -p 0/1 | ||||
|  | ||||
|     * Set MODE for compute applications, query with --query-gpu=compute_mode::  | ||||
|     * Set MODE for compute applications, query with --query-gpu=compute_mode:: | ||||
|  | ||||
|         nvidia-smi -i 0 -c 0/1/2/3 | ||||
|  | ||||
|     * Trigger reset of the GPU ::  | ||||
|     * Trigger reset of the GPU :: | ||||
|  | ||||
|         nvidia-smi -i 0 -r | ||||
|  | ||||
| @@ -48,7 +48,7 @@ Some useful ``nvidia-smi`` example commands for management. | ||||
| Monitoring | ||||
| ---------- | ||||
|  | ||||
| Some useful ``nvidia-smi`` example commands for monitoring.   | ||||
| Some useful ``nvidia-smi`` example commands for monitoring. | ||||
|  | ||||
|     * The number of NVIDIA GPUs in the system :: | ||||
|  | ||||
| @@ -75,7 +75,7 @@ Some useful ``nvidia-smi`` example commands for monitoring. | ||||
|         nvidia-smi -i 0 --query-gpu=compute_mode --format=csv,noheader | ||||
|  | ||||
|     * Percent of time over the past sample period during which one or more kernels was executing on the GPU:: | ||||
|   | ||||
|  | ||||
|         nvidia-smi -i 0 --query-gpu=utilization.gpu --format=csv,noheader | ||||
|  | ||||
|     * Total errors detected across entire chip. Sum of device_memory, register_file, l1_cache, l2_cache and texture_memory :: | ||||
| @@ -86,14 +86,14 @@ Some useful ``nvidia-smi`` example commands for monitoring. | ||||
|  | ||||
|         nvidia-smi -i 0 --query-gpu=temperature.gpu --format=csv,noheader | ||||
|  | ||||
|     * The ECC mode that the GPU is currently operating under::  | ||||
|     * The ECC mode that the GPU is currently operating under:: | ||||
|  | ||||
|         nvidia-smi -i 0 --query-gpu=ecc.mode.current --format=csv,noheader | ||||
|  | ||||
|     * The power management status:: | ||||
|  | ||||
|         nvidia-smi -i 0 --query-gpu=power.management --format=csv,noheader | ||||
|   | ||||
|  | ||||
|     * The last measured power draw for the entire board, in watts:: | ||||
|  | ||||
|         nvidia-smi -i 0 --query-gpu=power.draw --format=csv,noheader | ||||
|   | ||||
| @@ -16,7 +16,7 @@ Add this script to your node object using the ``chdef`` command: :: | ||||
| Setting GPU Configurations | ||||
| -------------------------- | ||||
|  | ||||
| NVIDIA allows for changing GPU attributes using the ``nvidia-smi`` commands.  These settings do not persist when a compute node is rebooted.  One way set these attributes is to use an xCAT postscript to set the values every time the node is rebooted.   | ||||
| NVIDIA allows for changing GPU attributes using the ``nvidia-smi`` commands.  These settings do not persist when a compute node is rebooted.  One way set these attributes is to use an xCAT postscript to set the values every time the node is rebooted. | ||||
|  | ||||
|  | ||||
| * Set the power limit to 175W: :: | ||||
|   | ||||
| @@ -12,7 +12,7 @@ xCAT provides a sample package list files for CUDA. You can find them at: | ||||
|     * ``/opt/xcat/share/xcat/install/ubuntu/cudafull.ubuntu14.04.3.ppc64el.pkglist`` | ||||
|     * ``/opt/xcat/share/xcat/install/ubuntu/cudaruntime.ubuntu14.04.3.ppc64el.pkglist`` | ||||
|  | ||||
| **[diskful note]**: There is a requirement to reboot the machine after the CUDA drivers are installed.  To satisfy this requirement, the CUDA software is installed in the ``pkglist`` attribute of the osimage definition where the reboot happens after the Operating System is installed.  | ||||
| **[diskful note]**: There is a requirement to reboot the machine after the CUDA drivers are installed.  To satisfy this requirement, the CUDA software is installed in the ``pkglist`` attribute of the osimage definition where the reboot happens after the Operating System is installed. | ||||
|  | ||||
| cudafull | ||||
| ^^^^^^^^ | ||||
| @@ -21,13 +21,13 @@ cudafull | ||||
|  | ||||
|     lsdef -t osimage -z ubuntu14.04.3-ppc64el-install-compute \ | ||||
|       | sed 's/install-compute:/install-cudafull:/' \ | ||||
|       | mkdef -z  | ||||
|       | mkdef -z | ||||
|  | ||||
| #. Add the CUDA repo created in the previous step to the ``pkgdir`` attribute. | ||||
|  | ||||
|    If your Management Node IP is 10.0.0.1, the URL for the repo would be ``http://10.0.0.1/install/cuda-repo/ppc64el/var/cuda-repo-7-5-local``, add it to the pkgdir:: | ||||
|  | ||||
|     chdef -t osimage -o ubuntu14.04.3-ppc64el-install-cudafull \  | ||||
|     chdef -t osimage -o ubuntu14.04.3-ppc64el-install-cudafull \ | ||||
|      -p pkgdir=http://10.0.0.1/install/cuda-repo/ppc64el/var/cuda-repo-7-5-local | ||||
|  | ||||
|  | ||||
| @@ -45,7 +45,7 @@ cudaruntime | ||||
|  | ||||
|     lsdef -t osimage -z ubuntu14.04.3-ppc64el-install-compute \ | ||||
|       | sed 's/install-compute:/install-cudaruntime:/' \ | ||||
|       | mkdef -z  | ||||
|       | mkdef -z | ||||
|  | ||||
| #. Add the CUDA repo created in the previous step to the ``pkgdir`` attribute: | ||||
|  | ||||
| @@ -64,14 +64,14 @@ cudaruntime | ||||
| Diskless images | ||||
| --------------- | ||||
|  | ||||
| The following examples will create diskless images for ``cudafull`` and ``cudaruntime``.  The osimage definitions will be created from the base ``ubuntu14.04.3-ppc64el-netboot-compute`` osimage.  | ||||
| The following examples will create diskless images for ``cudafull`` and ``cudaruntime``.  The osimage definitions will be created from the base ``ubuntu14.04.3-ppc64el-netboot-compute`` osimage. | ||||
|  | ||||
| xCAT provides a sample package list files for CUDA. You can find them at: | ||||
|  | ||||
|     * ``/opt/xcat/share/xcat/netboot/ubuntu/cudafull.ubuntu14.04.3.ppc64el.pkglist`` | ||||
|     * ``/opt/xcat/share/xcat/netboot/ubuntu/cudaruntime.ubuntu14.04.3.ppc64el.pkglist`` | ||||
|  | ||||
| **[diskless note]**: For diskless images, the requirement for rebooting the machine is not applicable because the images is loaded on each reboot.  The install of the CUDA packages is required to be done in the ``otherpkglist`` **NOT** the ``pkglist``.  | ||||
| **[diskless note]**: For diskless images, the requirement for rebooting the machine is not applicable because the images is loaded on each reboot.  The install of the CUDA packages is required to be done in the ``otherpkglist`` **NOT** the ``pkglist``. | ||||
|  | ||||
| cudafull | ||||
| ^^^^^^^^ | ||||
| @@ -80,9 +80,9 @@ cudafull | ||||
|  | ||||
|     lsdef -t osimage -z ubuntu14.04.3-ppc64el-netboot-compute \ | ||||
|       | sed 's/netboot-compute:/netboot-cudafull:/' \ | ||||
|       | mkdef -z  | ||||
|       | mkdef -z | ||||
|  | ||||
| #. Add the CUDA repo created in the previous step to the ``otherpkgdir`` attribute.  | ||||
| #. Add the CUDA repo created in the previous step to the ``otherpkgdir`` attribute. | ||||
|  | ||||
|    If your Management Node IP is 10.0.0.1, the URL for the repo would be ``http://10.0.0.1/install/cuda-repo/ppc64el/var/cuda-repo-7-5-local``, add it to the ``otherpkgdir``:: | ||||
|  | ||||
| @@ -98,7 +98,7 @@ cudafull | ||||
|  | ||||
| #. Verify that ``acpid`` is installed on the Management Node or on the Ubuntu host where you are generating the diskless image: :: | ||||
|  | ||||
|     apt-get install -y acpid  | ||||
|     apt-get install -y acpid | ||||
|  | ||||
| #. Generate the image: :: | ||||
|  | ||||
| @@ -115,9 +115,9 @@ cudaruntime | ||||
|  | ||||
|     lsdef -t osimage -z ubuntu14.04.3-ppc64el-netboot-compute \ | ||||
|       | sed 's/netboot-compute:/netboot-cudaruntime:/' \ | ||||
|       | mkdef -z  | ||||
|       | mkdef -z | ||||
|  | ||||
| #. Add the CUDA repo created in the previous step to the ``otherpkgdir`` attribute.  | ||||
| #. Add the CUDA repo created in the previous step to the ``otherpkgdir`` attribute. | ||||
|  | ||||
|    If your Management Node IP is 10.0.0.1, the URL for the repo would be ``http://10.0.0.1/install/cuda-repo/ppc64el/var/cuda-repo-7-5-local``, add it to the ``otherpkgdir``:: | ||||
|  | ||||
| @@ -133,7 +133,7 @@ cudaruntime | ||||
|  | ||||
| #. Verify that ``acpid`` is installed on the Management Node or on the Ubuntu host where you are generating the diskless image: :: | ||||
|  | ||||
|     apt-get install -y acpid  | ||||
|     apt-get install -y acpid | ||||
|  | ||||
| #. Generate the image: :: | ||||
|  | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| Create CUDA software repository | ||||
| =============================== | ||||
|  | ||||
| The NVIDIA CUDA Toolkit is available to download at http://developer.nvidia.com/cuda-downloads.   | ||||
| The NVIDIA CUDA Toolkit is available to download at http://developer.nvidia.com/cuda-downloads. | ||||
|  | ||||
| Download the toolkit and prepare the software repository on the xCAT Management Node to server the NVIDIA CUDA files. | ||||
|  | ||||
|   | ||||
| @@ -3,9 +3,9 @@ Install NVIDIA Management Library (optional) | ||||
|  | ||||
| See https://developer.nvidia.com/nvidia-management-library-nvml for more information. | ||||
|  | ||||
| The .run file can be downloaded from NVIDIAs website and placed into the ``/install/postscripts`` directory on the Management Node.  | ||||
| The .run file can be downloaded from NVIDIAs website and placed into the ``/install/postscripts`` directory on the Management Node. | ||||
|  | ||||
| To enable installation of the management library after the node is install, add the .run file to the ``postbootscripts`` attribute for the nodes: ::  | ||||
| To enable installation of the management library after the node is install, add the .run file to the ``postbootscripts`` attribute for the nodes: :: | ||||
|  | ||||
|    # ensure the .run file has execute permission | ||||
|    chmod +x /install/postscripts/<gpu_deployment_kit>.run | ||||
|   | ||||
| @@ -9,14 +9,14 @@ Local | ||||
| A local package repo will contain all of the CUDA packages.  Extract the CUDA packages into ``/install/cuda-repo/ppc64le``: :: | ||||
|  | ||||
|     # For CUDA toolkit: /root/cuda-repo-ubuntu1404-7-5-local_7.5-18_ppc64el.deb | ||||
|      | ||||
|  | ||||
|     # Create the repo directory under xCAT /install dir | ||||
|     mkdir -p /install/cuda-repo/ppc64el | ||||
|  | ||||
|     # extract the package | ||||
|     dpkg -x /root/cuda-repo-ubuntu1404-7-5-local_7.5-18_ppc64el.deb /install/cuda-repo/ppc64el | ||||
|  | ||||
|      | ||||
|  | ||||
|  | ||||
| Network | ||||
| ------- | ||||
| @@ -31,7 +31,7 @@ The ``sources.list`` entry may look similar to: :: | ||||
| Authorize the CUDA repo | ||||
| ----------------------- | ||||
|  | ||||
| In order to access the CUDA repository you must import the CUDA GPGKEY into the ``apt_key`` trust list.  xCAT provides a sample postscript ``/install/postscripts/addcudakey`` to help with this task: ::  | ||||
| In order to access the CUDA repository you must import the CUDA GPGKEY into the ``apt_key`` trust list.  xCAT provides a sample postscript ``/install/postscripts/addcudakey`` to help with this task: :: | ||||
|  | ||||
|    chdef -t node -o <noderange> -p postscripts=addcudakey | ||||
|  | ||||
|   | ||||
| @@ -4,10 +4,10 @@ Verify CUDA Installation | ||||
| **The following verification steps only apply to the ``cudafull`` installations.** | ||||
|  | ||||
| #. Verify driver version by looking at: ``/proc/driver/nvidia/version``: :: | ||||
|    | ||||
|  | ||||
|     # cat /proc/driver/nvidia/version | ||||
|      NVRM version: NVIDIA UNIX ppc64le Kernel Module  352.39  Fri Aug 14 17:10:41 PDT 2015 | ||||
|      GCC version:  gcc version 4.8.5 20150623 (Red Hat 4.8.5-4) (GCC)  | ||||
|      GCC version:  gcc version 4.8.5 20150623 (Red Hat 4.8.5-4) (GCC) | ||||
|  | ||||
| #. Verify the CUDA Toolkit version :: | ||||
|  | ||||
| @@ -19,7 +19,7 @@ Verify CUDA Installation | ||||
|  | ||||
| #. Verify running CUDA GPU jobs by compiling the samples and executing the ``deviceQuery`` or ``bandwidthTest`` programs. | ||||
|  | ||||
|    * Compile the samples:  | ||||
|    * Compile the samples: | ||||
|  | ||||
|      **[RHEL]:** :: | ||||
|  | ||||
| @@ -32,13 +32,13 @@ Verify CUDA Installation | ||||
|  | ||||
|         cd ~/ | ||||
|         apt-get install cuda-samples-7-0 -y | ||||
|         cd /usr/local/cuda-7.0/samples  | ||||
|         make  | ||||
|         cd /usr/local/cuda-7.0/samples | ||||
|         make | ||||
|  | ||||
|  | ||||
|    * Run the ``deviceQuery`` sample: :: | ||||
|  | ||||
|         # ./bin/ppc64le/linux/release/deviceQuery    | ||||
|         # ./bin/ppc64le/linux/release/deviceQuery | ||||
|           ./deviceQuery Starting... | ||||
|           CUDA Device Query (Runtime API) version (CUDART static linking) | ||||
|           Detected 4 CUDA Capable device(s) | ||||
| @@ -54,9 +54,9 @@ Verify CUDA Installation | ||||
|             ............ | ||||
|             deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 7.5, CUDA Runtime Version = 7.5, NumDevs = 4, Device0 = Tesla K80, Device1 = Tesla K80, Device2 = Tesla K80, Device3 = Tesla K80 | ||||
|             Result = PASS | ||||
|     | ||||
|  | ||||
|    * Run the ``bandwidthTest`` sample: :: | ||||
|   | ||||
|  | ||||
|         # ./bin/ppc64le/linux/release/bandwidthTest | ||||
|           [CUDA Bandwidth Test] - Starting... | ||||
|           Running on... | ||||
| @@ -75,6 +75,6 @@ Verify CUDA Installation | ||||
|             Transfer Size (Bytes)        Bandwidth(MB/s) | ||||
|             33554432                     141485.3 | ||||
|           Result = PASS | ||||
|      | ||||
|  | ||||
|     NOTE: The CUDA Samples are not meant for performance measurements. Results may vary when GPU Boost is enabled. | ||||
|      | ||||
|  | ||||
|   | ||||
| @@ -1,42 +1,42 @@ | ||||
| Configuration considerations | ||||
| ============================ | ||||
|  | ||||
| xCAT provides several configuration options for the HAMN, you can select one of the option based on your failover requirements and hardware configuration, the following configuration considerations should be able to help you to make the decision.  | ||||
| xCAT provides several configuration options for the HAMN, you can select one of the option based on your failover requirements and hardware configuration, the following configuration considerations should be able to help you to make the decision. | ||||
|  | ||||
| Data synchronization mechanism | ||||
| ------------------------------ | ||||
|  | ||||
| The data synchronization is important for any high availability configuration. When the xCAT management node failover occurs, the xCAT data needs to be exactly the same before failover, and some of the operating system configuration should also be synchronized between the two management nodes. To be specific, the following data should be synchronized between the two management nodes to make the xCAT HAMN work:  | ||||
| The data synchronization is important for any high availability configuration. When the xCAT management node failover occurs, the xCAT data needs to be exactly the same before failover, and some of the operating system configuration should also be synchronized between the two management nodes. To be specific, the following data should be synchronized between the two management nodes to make the xCAT HAMN work: | ||||
|  | ||||
| * xCAT database  | ||||
| * xCAT configuration files, like ``/etc/xcat``, ``~/.xcat``, ``/opt/xcat``  | ||||
| * The configuration files for the services that are required by xCAT, like named, DHCP, apache, nfs, ssh, etc.  | ||||
| * The operating systems images repository and users customization data repository, the ``/install`` directory contains these repositories in most cases.  | ||||
| * xCAT database | ||||
| * xCAT configuration files, like ``/etc/xcat``, ``~/.xcat``, ``/opt/xcat`` | ||||
| * The configuration files for the services that are required by xCAT, like named, DHCP, apache, nfs, ssh, etc. | ||||
| * The operating systems images repository and users customization data repository, the ``/install`` directory contains these repositories in most cases. | ||||
|  | ||||
| There are a lot of ways for data synchronization, but considering the specific xCAT HAMN requirements, only several of the data synchronization options are practical for xCAT HAMN.  | ||||
| There are a lot of ways for data synchronization, but considering the specific xCAT HAMN requirements, only several of the data synchronization options are practical for xCAT HAMN. | ||||
|  | ||||
| **1\. Move physical disks between the two management nodes**: if we could physically move the hard disks from the failed management node to the backup management node, and bring up the backup management node, then both the operating system and xCAT data will be identical between the new management node and the failed management node. RAID1 or disk mirroring could be used to avoid the disk be a single point of failure.  | ||||
| **1\. Move physical disks between the two management nodes**: if we could physically move the hard disks from the failed management node to the backup management node, and bring up the backup management node, then both the operating system and xCAT data will be identical between the new management node and the failed management node. RAID1 or disk mirroring could be used to avoid the disk be a single point of failure. | ||||
|  | ||||
| **2\. Shared data**: the two management nodes use the single copy of xCAT data, no matter which management node is the primary MN, the cluster management capability is running on top of the single data copy. The access to the data could be done through various ways like shared storage, NAS, NFS, samba etc. Based on the protocol being used, the data might be accessible only on one management node at a time or be accessible on both management nodes in parallel. If the data could only be accessed from one management node, the failover process need to take care of the data access transition; if the data could be accessed on both management nodes, the failover does not need to consider the data access transition, it usually means the failover process could be faster.  | ||||
| **2\. Shared data**: the two management nodes use the single copy of xCAT data, no matter which management node is the primary MN, the cluster management capability is running on top of the single data copy. The access to the data could be done through various ways like shared storage, NAS, NFS, samba etc. Based on the protocol being used, the data might be accessible only on one management node at a time or be accessible on both management nodes in parallel. If the data could only be accessed from one management node, the failover process need to take care of the data access transition; if the data could be accessed on both management nodes, the failover does not need to consider the data access transition, it usually means the failover process could be faster. | ||||
|  | ||||
| Warning: Running database through network file system has a lot of potential problems and is not practical, however, most of the database system provides database replication feature that can be used to synchronize the database between the two management nodes.  | ||||
| Warning: Running database through network file system has a lot of potential problems and is not practical, however, most of the database system provides database replication feature that can be used to synchronize the database between the two management nodes. | ||||
|  | ||||
| **3\. Mirroring**: each of the management node has its own copy of the xCAT data, and the two copies of data are synchronized through mirroring mechanism. DRBD is used widely in the high availability configuration scenarios, to provide data replication by mirroring a whole block device via network. If we put all the important data for xCAT onto the DRBD devices, then it could assure the data is synchronized between the two management nodes. Some parallel file system also provides capability to mirror data through network.  | ||||
| **3\. Mirroring**: each of the management node has its own copy of the xCAT data, and the two copies of data are synchronized through mirroring mechanism. DRBD is used widely in the high availability configuration scenarios, to provide data replication by mirroring a whole block device via network. If we put all the important data for xCAT onto the DRBD devices, then it could assure the data is synchronized between the two management nodes. Some parallel file system also provides capability to mirror data through network. | ||||
|  | ||||
| Manual vs. Automatic Failover | ||||
| ----------------------------- | ||||
|  | ||||
| When the primary management node fails, the backup management node could automatically take over, or the administrator has to perform some manual procedure to finish the failover. In general, the automatic failover takes less time to detect the failure and perform and failover, comparing with the manual failover, but the automatic failover requires more complex configuration. We could not say the automatic failover is better than the manual failover in all cases, the following factors should be considered when deciding the manual failover or automatic failover:  | ||||
| When the primary management node fails, the backup management node could automatically take over, or the administrator has to perform some manual procedure to finish the failover. In general, the automatic failover takes less time to detect the failure and perform and failover, comparing with the manual failover, but the automatic failover requires more complex configuration. We could not say the automatic failover is better than the manual failover in all cases, the following factors should be considered when deciding the manual failover or automatic failover: | ||||
|  | ||||
| **1\. How long the cluster could survive if the management node is down?** | ||||
|  | ||||
| If the cluster could not survive for more than several minutes, then the automatic failover might be the only option; if the compute nodes could run without the management node, at least for a while, then the manual failover could be an option.  | ||||
| If the cluster could not survive for more than several minutes, then the automatic failover might be the only option; if the compute nodes could run without the management node, at least for a while, then the manual failover could be an option. | ||||
|  | ||||
| From xCAT perspective, if the management node needs to provide network services like DHCP, named, ntp or nfs to the compute nodes, then the cluster probably could not survive too long if the management node is down; if the management node only performs hardware control and some other management capabilities, then the failed management node may not cause too much trouble for the cluster. xCAT provides various options for configuring if the compute nodes rely on the network services on the management node.  | ||||
| From xCAT perspective, if the management node needs to provide network services like DHCP, named, ntp or nfs to the compute nodes, then the cluster probably could not survive too long if the management node is down; if the management node only performs hardware control and some other management capabilities, then the failed management node may not cause too much trouble for the cluster. xCAT provides various options for configuring if the compute nodes rely on the network services on the management node. | ||||
|  | ||||
| **2\. Configuration complexity** | ||||
|  | ||||
| The configuration for the high availability applications is usually complex, it may take a long time to configure, debug and stabilize the high availability configuration.  | ||||
| The configuration for the high availability applications is usually complex, it may take a long time to configure, debug and stabilize the high availability configuration. | ||||
|  | ||||
| **3\. Maintenance effort** | ||||
|  | ||||
| @@ -45,7 +45,7 @@ The automatic failover brings in several high availability applications, after t | ||||
| Configuration Options | ||||
| ===================== | ||||
|  | ||||
| The combinations of data synchronization mechanism and manual/automatic failover indicates different HAMN configuration options, the table below list all the combinations (the bold numbers are the combinations xCAT has documented and tested):  | ||||
| The combinations of data synchronization mechanism and manual/automatic failover indicates different HAMN configuration options, the table below list all the combinations (the bold numbers are the combinations xCAT has documented and tested): | ||||
|  | ||||
| +-------------------+-------------------------+-----------------+--------------+ | ||||
| |#                  | **Move physical disks** | **Shared data** | **Mirroring**| | ||||
| @@ -55,14 +55,14 @@ The combinations of data synchronization mechanism and manual/automatic failover | ||||
| |Automatic Failover | 4                       | **5**           | **6**        | | ||||
| +-------------------+-------------------------+-----------------+--------------+ | ||||
|  | ||||
| Option 1, :ref:`setup_ha_mgmt_node_with_raid1_and_disks_move`  | ||||
| Option 1, :ref:`setup_ha_mgmt_node_with_raid1_and_disks_move` | ||||
|  | ||||
| Option 2, :ref:`setup_ha_mgmt_node_with_shared_data`  | ||||
| Option 2, :ref:`setup_ha_mgmt_node_with_shared_data` | ||||
|  | ||||
| Option 3, it is doable but not currently supported. | ||||
|  | ||||
| Option 4, it is not practical. | ||||
|  | ||||
| Option 5, :ref:`setup_xcat_high_available_management_node_with_nfs`  | ||||
| Option 5, :ref:`setup_xcat_high_available_management_node_with_nfs` | ||||
|  | ||||
| Option 6, :ref:`setup_ha_mgmt_node_with_drbd_pacemaker_corosync` | ||||
|   | ||||
| @@ -6,7 +6,7 @@ The xCAT management node plays an important role in the cluster, if the manageme | ||||
| The goal of the HAMN (High Availability Management Node) configuration is, when the primary xCAT management node fails, the standby management node can take over the role of the management node, either through automatic failover or through manual procedure performed by the administrator, and thus avoid long periods of time during which your cluster does not have active cluster management function available. | ||||
|  | ||||
|  | ||||
| The following pages describes ways to configure the xCAT Management Node for High Availability.  | ||||
| The following pages describes ways to configure the xCAT Management Node for High Availability. | ||||
|  | ||||
| .. toctree:: | ||||
|    :maxdepth: 2 | ||||
|   | ||||
| @@ -1651,7 +1651,7 @@ which should result in the following output: :: | ||||
|       x3550m4n01 x3550m4n02 | ||||
|      Pacemaker Nodes: | ||||
|       x3550m4n01 x3550m4n02 | ||||
|       | ||||
|  | ||||
|      Resources: | ||||
|       Resource: ip_xCAT (class=ocf provider=heartbeat type=IPaddr2) | ||||
|        Attributes: ip=10.1.0.1 iflabel=xCAT cidr_netmask=22 nic=eno2 | ||||
| @@ -1714,10 +1714,10 @@ which should result in the following output: :: | ||||
|         Operations: start interval=0s timeout=1m (symlinks_xCAT-start-timeout-1m) | ||||
|                     stop interval=0s timeout=1m (symlinks_xCAT-stop-timeout-1m) | ||||
|                     monitor interval=31s on-fail=ignore (symlinks_xCAT-monitor-interval-31s) | ||||
|       | ||||
|  | ||||
|      Stonith Devices: | ||||
|      Fencing Levels: | ||||
|       | ||||
|  | ||||
|      Location Constraints: | ||||
|      Ordering Constraints: | ||||
|        promote ms_drbd_xCAT then start grp_xCAT (kind:Mandatory) (id:order-ms_drbd_xCAT-grp_xCAT-mandatory) | ||||
| @@ -1756,7 +1756,7 @@ which should result in the following output: :: | ||||
|        gmetad with grp_xCAT (score:INFINITY) (id:colocation-gmetad-grp_xCAT-INFINITY) | ||||
|        icinga with grp_xCAT (score:INFINITY) (id:colocation-icinga-grp_xCAT-INFINITY) | ||||
|        ip_xCAT with grp_xCAT (score:INFINITY) (id:colocation-ip_xCAT-grp_xCAT-INFINITY) | ||||
|       | ||||
|  | ||||
|      Cluster Properties: | ||||
|       cluster-infrastructure: corosync | ||||
|       cluster-name: ucl_cluster | ||||
| @@ -1780,12 +1780,12 @@ And the resulting output should be the following: :: | ||||
|      Version: 1.1.12-a14efad | ||||
|      2 Nodes configured | ||||
|      17 Resources configured | ||||
|       | ||||
|       | ||||
|  | ||||
|  | ||||
|      Online: [ x3550m4n01 x3550m4n02 ] | ||||
|       | ||||
|  | ||||
|      Full list of resources: | ||||
|       | ||||
|  | ||||
|       ip_xCAT        (ocf::heartbeat:IPaddr2):       Started x3550m4n01 | ||||
|       NFS_xCAT       (ocf::heartbeat:nfsserver):     Started x3550m4n01 | ||||
|       apache_xCAT    (ocf::heartbeat:apache):        Started x3550m4n01 | ||||
| @@ -1802,7 +1802,7 @@ And the resulting output should be the following: :: | ||||
|       Resource Group: grp_xCAT | ||||
|           fs_xCAT    (ocf::heartbeat:Filesystem):    Started x3550m4n01 | ||||
|           symlinks_xCAT      (ocf::tummy:drbdlinks): Started x3550m4n01 | ||||
|       | ||||
|  | ||||
|      PCSD Status: | ||||
|        x3550m4n01: Online | ||||
|        x3550m4n02: Online | ||||
| @@ -1818,7 +1818,7 @@ Further from this, the following changes needed to be made for nfs in el7 :: | ||||
|      Description=RPC Pipe File System | ||||
|      DefaultDependencies=no | ||||
|      Conflicts=umount.target | ||||
|       | ||||
|  | ||||
|      [Mount] | ||||
|      What=sunrpc | ||||
|      Where=/var/lib/nfs_local/rpc_pipefs | ||||
| @@ -1849,9 +1849,9 @@ Further from this, the following changes needed to be made for nfs in el7 :: | ||||
|      -After=var-lib-nfs-rpc_pipefs.mount | ||||
|      +Requires=var-lib-nfs_local-rpc_pipefs.mount | ||||
|      +After=var-lib-nfs_local-rpc_pipefs.mount | ||||
|       | ||||
|  | ||||
|       ConditionPathExists=/etc/krb5.keytab | ||||
|       | ||||
|  | ||||
|  | ||||
|      --- /usr/lib/systemd/system/nfs-secure.service	2015-01-23 16:30:26.000000000 +0000 | ||||
|      +++ /etc/systemd/system/nfs-secure.service	2015-10-13 01:39:36.000000000 +0100 | ||||
| @@ -1863,9 +1863,9 @@ Further from this, the following changes needed to be made for nfs in el7 :: | ||||
|      -After=var-lib-nfs-rpc_pipefs.mount | ||||
|      +Requires=var-lib-nfs_local-rpc_pipefs.mount | ||||
|      +After=var-lib-nfs_local-rpc_pipefs.mount | ||||
|       | ||||
|  | ||||
|       ConditionPathExists=/etc/krb5.keytab | ||||
|       | ||||
|  | ||||
|  | ||||
|      --- /usr/lib/systemd/system/nfs-secure-server.service	2015-01-23 16:30:26.000000000 +0000 | ||||
|      +++ /etc/systemd/system/nfs-secure-server.service	2015-10-13 01:39:36.000000000 +0100 | ||||
| @@ -1878,7 +1878,7 @@ Further from this, the following changes needed to be made for nfs in el7 :: | ||||
|      +After=var-lib-nfs_local-rpc_pipefs.mount | ||||
|       PartOf=nfs-server.service | ||||
|       PartOf=nfs-utils.service | ||||
|       | ||||
|  | ||||
|  | ||||
|      --- /usr/lib/systemd/system/nfs-blkmap.service	2015-01-23 16:30:26.000000000 +0000 | ||||
|      +++ /etc/systemd/system/nfs-blkmap.service	2015-10-13 01:39:36.000000000 +0100 | ||||
| @@ -1890,7 +1890,7 @@ Further from this, the following changes needed to be made for nfs in el7 :: | ||||
|      -Requires=var-lib-nfs-rpc_pipefs.mount | ||||
|      +After=var-lib-nfs_local-rpc_pipefs.mount | ||||
|      +Requires=var-lib-nfs_local-rpc_pipefs.mount | ||||
|       | ||||
|  | ||||
|       Requisite=nfs-blkmap.target | ||||
|       After=nfs-blkmap.target | ||||
|  | ||||
|   | ||||
| @@ -81,4 +81,4 @@ The failover procedure is simple and straightforward: | ||||
|  | ||||
| #. Boot up the standby management node | ||||
|  | ||||
| #. Verify the standby management node could now perform all the cluster management operations.  | ||||
| #. Verify the standby management node could now perform all the cluster management operations. | ||||
|   | ||||
| @@ -54,7 +54,7 @@ The configuration procedure will be quite different based on the shared data mec | ||||
|     /install | ||||
|     ~/.xcat | ||||
|     /<dbdirectory> | ||||
|     /tftpboot  | ||||
|     /tftpboot | ||||
|  | ||||
|  | ||||
| .. note:: * For MySQL, the database directory is ``/var/lib/mysql`` | ||||
| @@ -100,7 +100,7 @@ Setup xCAT on the Primary Management Node | ||||
|  | ||||
|    Add the two management nodes into policy table: :: | ||||
|  | ||||
|     tabdump policy   | ||||
|     tabdump policy | ||||
|     "1.2","rhmn1",,,,,,"trusted",, | ||||
|     "1.3","rhmn2",,,,,,"trusted",, | ||||
|     "1.4","rhmn",,,,,,"trusted",, | ||||
| @@ -109,7 +109,7 @@ Setup xCAT on the Primary Management Node | ||||
|  | ||||
|     chdef -t site databaseloc=/dbdirectory | ||||
|  | ||||
| #. Install and configure database. Refer to the doc [**doto:** choosing_the_Database] to configure the database on the xCAT management node. For PostgreSql, add primary and standby IP addresses access to database, use ``pgsqlsetup -i -a 9.114.47.103 -a 9.114.47.104`` to migrate an existing xCAT database from SQLite to PostgreSQL.   | ||||
| #. Install and configure database. Refer to the doc [**doto:** choosing_the_Database] to configure the database on the xCAT management node. For PostgreSql, add primary and standby IP addresses access to database, use ``pgsqlsetup -i -a 9.114.47.103 -a 9.114.47.104`` to migrate an existing xCAT database from SQLite to PostgreSQL. | ||||
|  | ||||
|    Verify xcat is running on correct database by running: :: | ||||
|  | ||||
| @@ -135,7 +135,7 @@ Setup xCAT on the Primary Management Node | ||||
| #. Stop the xcatd daemon and some related network services from starting on reboot: :: | ||||
|  | ||||
|     service xcatd stop | ||||
|     chkconfig --level 345 xcatd off   | ||||
|     chkconfig --level 345 xcatd off | ||||
|     service conserver off | ||||
|     chkconfig --level 2345 conserver off | ||||
|     service dhcpd stop | ||||
| @@ -148,8 +148,8 @@ Setup xCAT on the Primary Management Node | ||||
|  | ||||
| #. (Optional) If DFM is being used for hardware control capabilities, install DFM package, setup xCAT to communicate directly to the System P server's service processor.:: | ||||
|  | ||||
|     xCAT-dfm RPM  | ||||
|     ISNM-hdwr_svr RPM   | ||||
|     xCAT-dfm RPM | ||||
|     ISNM-hdwr_svr RPM | ||||
|  | ||||
| #. If there is any node that is already managed by the Management Node,change the noderes table tftpserver & xcatmaster & nfsserver attributes to the Virtual ip | ||||
|  | ||||
| @@ -160,7 +160,7 @@ Setup xCAT on the Primary Management Node | ||||
| Setup xCAT on the Standby Management Node | ||||
| ========================================= | ||||
|  | ||||
| #. Make sure the standby management node is NOT using the shared data.  | ||||
| #. Make sure the standby management node is NOT using the shared data. | ||||
|  | ||||
| #. Add the alias ip address ``9.114.47.97`` into the ``/etc/resolv.conf`` as the nameserver. Change the hostname resolution order to be using ``/etc/hosts`` before using name server. Change "hosts: files dns" in /etc/nsswitch.conf. | ||||
|  | ||||
| @@ -174,8 +174,8 @@ Setup xCAT on the Standby Management Node | ||||
|  | ||||
| #. (Optional) DFM only, Install DFM package: :: | ||||
|  | ||||
|     xCAT-dfm RPM  | ||||
|     ISNM-hdwr_svr RPM  | ||||
|     xCAT-dfm RPM | ||||
|     ISNM-hdwr_svr RPM | ||||
|  | ||||
| #. Setup hostname resolution between the primary management node and standby management node. Make sure the primary management node can resolve the hostname of the standby management node, and vice versa. | ||||
|  | ||||
| @@ -190,7 +190,7 @@ Setup xCAT on the Standby Management Node | ||||
| #. Stop the xcatd daemon and related network services from starting on reboot: :: | ||||
|  | ||||
|     service xcatd stop | ||||
|     chkconfig --level 345 xcatd off   | ||||
|     chkconfig --level 345 xcatd off | ||||
|     service conserver off | ||||
|     chkconfig --level 2345 conserver off | ||||
|     service dhcpd stop | ||||
| @@ -458,7 +458,7 @@ The operating system is installed on the internal disks. | ||||
|  | ||||
| #. Connect the shared disk to both management nodes | ||||
|  | ||||
|    To verify the shared disks are connected correctly, run the sginfo command on both management nodes and look for the same serial number in the output. Be aware that the sginfo command may not be installed by default on Linux, the sginfo command is shipped with package sg3_utils, you can manually install the package sg3_utils on both management nodes.  | ||||
|    To verify the shared disks are connected correctly, run the sginfo command on both management nodes and look for the same serial number in the output. Be aware that the sginfo command may not be installed by default on Linux, the sginfo command is shipped with package sg3_utils, you can manually install the package sg3_utils on both management nodes. | ||||
|  | ||||
|    Once the sginfo command is installed, run sginfo -l command on both management nodes to list all the known SCSI disks, for example, enter: :: | ||||
|  | ||||
| @@ -473,9 +473,9 @@ The operating system is installed on the internal disks. | ||||
|     /dev/sg3 [=/dev/sdd  scsi0 ch=0 id=4 lun=0] | ||||
|     /dev/sg4 [=/dev/sde  scsi0 ch=0 id=5 lun=0] | ||||
|  | ||||
|    Use the ``sginfo -s <device_name>`` to identify disks with the same serial number on both management nodes, for example:  | ||||
|    Use the ``sginfo -s <device_name>`` to identify disks with the same serial number on both management nodes, for example: | ||||
|  | ||||
|    On the primary management node: ::  | ||||
|    On the primary management node: :: | ||||
|  | ||||
|     [root@rhmn1 ~]# sginfo -s /dev/sdb | ||||
|     Serial Number '1T23043224      ' | ||||
| @@ -487,7 +487,7 @@ The operating system is installed on the internal disks. | ||||
|     [root@rhmn2~]# sginfo -s /dev/sdb | ||||
|     Serial Number '1T23043224      ' | ||||
|  | ||||
|    We can see that the ``/dev/sdb`` is a shared disk on both management nodes. In some cases, as with mirrored disks and when there is no matching of serial numbers between the two management nodes, multiple disks on a single server can have the same serial number, In these cases, format the disks, mount them on both management nodes, and then touch files on the disks to determine if they are shared between the management nodes.  | ||||
|    We can see that the ``/dev/sdb`` is a shared disk on both management nodes. In some cases, as with mirrored disks and when there is no matching of serial numbers between the two management nodes, multiple disks on a single server can have the same serial number, In these cases, format the disks, mount them on both management nodes, and then touch files on the disks to determine if they are shared between the management nodes. | ||||
|  | ||||
| #. Create partitions on shared disks | ||||
|  | ||||
| @@ -495,7 +495,7 @@ The operating system is installed on the internal disks. | ||||
|  | ||||
|     fdisk /dev/sdc | ||||
|  | ||||
|    Verify the partitions are created by running ``fdisk -l``.  | ||||
|    Verify the partitions are created by running ``fdisk -l``. | ||||
|  | ||||
| #. Create file systems on shared disks | ||||
|  | ||||
| @@ -507,9 +507,9 @@ The operating system is installed on the internal disks. | ||||
|     mkfs.ext3 -v /dev/sdc4 | ||||
|     mkfs.ext3 -v /dev/sdc5 | ||||
|  | ||||
|    If you place entries for the disk in ``/etc/fstab``, which is not required, ensure that the entries do not have the system automatically mount the disk.  | ||||
|    If you place entries for the disk in ``/etc/fstab``, which is not required, ensure that the entries do not have the system automatically mount the disk. | ||||
|  | ||||
|    .. note::  Since the file systems will not be mounted automatically during system reboot this must be manually done and xCAT should be started **after** the filesystem is mounted.  | ||||
|    .. note::  Since the file systems will not be mounted automatically during system reboot this must be manually done and xCAT should be started **after** the filesystem is mounted. | ||||
|  | ||||
| #. Verify the file systems on the primary management node. | ||||
|  | ||||
| @@ -525,7 +525,7 @@ The operating system is installed on the internal disks. | ||||
|  | ||||
|      umount /etc/xcat | ||||
|      umount /install | ||||
|      umount ~/.xcat  | ||||
|      umount ~/.xcat | ||||
|      umount /<dbdirectory> | ||||
|      umount /tftpboot | ||||
|  | ||||
| @@ -539,9 +539,9 @@ The operating system is installed on the internal disks. | ||||
|      mount /dev/sdc4 /<dbdirectory> | ||||
|      mount /dev/sdc5/tftpboot | ||||
|  | ||||
|    You may get errors "mount: you must specify the filesystem type" or "mount: special device /dev/sdb1 does not exist" when trying to mount the file systems on the standby management node, this is caused by the missing devices files on the standby management node, run ``fidsk /dev/sdx`` and simply select "w write table to disk and exit" in the fdisk menu, then retry the mount.  | ||||
|    You may get errors "mount: you must specify the filesystem type" or "mount: special device /dev/sdb1 does not exist" when trying to mount the file systems on the standby management node, this is caused by the missing devices files on the standby management node, run ``fidsk /dev/sdx`` and simply select "w write table to disk and exit" in the fdisk menu, then retry the mount. | ||||
|  | ||||
|    After that, umount the file system on the standby management node: ::  | ||||
|    After that, umount the file system on the standby management node: :: | ||||
|  | ||||
|     umount /etc/xcat | ||||
|     umount /install | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| .. _setup_xcat_high_available_management_node_with_nfs: | ||||
|  | ||||
| Setup xCAT HA Mgmt with NFS pacemaker and corosync  | ||||
| Setup xCAT HA Mgmt with NFS pacemaker and corosync | ||||
| ==================================================================================== | ||||
|  | ||||
| In this doc, we will configure a xCAT HA cluster using ``pacemaker`` and ``corosync`` based on NFS server. ``pacemaker`` and ``corosync`` only support ``x86_64`` systems, more information about ``pacemaker`` and ``corosync`` refer to doc :ref:`setup_ha_mgmt_node_with_drbd_pacemaker_corosync`. | ||||
| @@ -24,19 +24,19 @@ The DB is SQLlite. There is no service node in this example. | ||||
| Prepare NFS server | ||||
| -------------------- | ||||
|  | ||||
| In NFS server 10.2.2.44, execute commands to export fs; If you want to use another non-root user to manage xCAT, such as hpcpeadmin.  | ||||
| In NFS server 10.2.2.44, execute commands to export fs; If you want to use another non-root user to manage xCAT, such as hpcpeadmin. | ||||
| You should create a directory for ``/home/hpcpeadmin``; Execute commands in NFS server c902f02x44. :: | ||||
|  | ||||
|     # service nfs start | ||||
|     # mkdir ~/.xcat  | ||||
|     # mkdir ~/.xcat | ||||
|     # mkdir -p /etc/xcat | ||||
|     # mkdir -p /disk1/install/ | ||||
|     # mkdir -p /disk1/hpcpeadmin | ||||
|     # mkdir -p /disk1/install/xcat | ||||
|  | ||||
|     # vi /etc/exports  | ||||
|     /disk1/install *(rw,no_root_squash,sync,no_subtree_check)  | ||||
|     /etc/xcat *(rw,no_root_squash,sync,no_subtree_check)  | ||||
|     # vi /etc/exports | ||||
|     /disk1/install *(rw,no_root_squash,sync,no_subtree_check) | ||||
|     /etc/xcat *(rw,no_root_squash,sync,no_subtree_check) | ||||
|     /root/.xcat *(rw,no_root_squash,sync,no_subtree_check) | ||||
|     /root/.ssh *(rw,no_root_squash,sync,no_subtree_check) | ||||
|     /disk1/hpcpeadmin *(rw,no_root_squash,sync,no_subtree_check) | ||||
| @@ -75,12 +75,12 @@ Execute steps on xCAT MN rhmn1 | ||||
|  | ||||
|    Mount share nfs from 10.2.2.44: :: | ||||
|  | ||||
|     # mkdir -p /install  | ||||
|     # mkdir -p /install | ||||
|     # mkdir -p /etc/xcat | ||||
|     # mkdir -p /home/hpcpeadmin | ||||
|     # mount 10.2.2.44:/disk1/install /install | ||||
|     # mount 10.2.2.44:/etc/xcat /etc/xcat | ||||
|     # mkdir -p /root/.xcat  | ||||
|     # mkdir -p /root/.xcat | ||||
|     # mount 10.2.2.44:/root/.xcat /root/.xcat | ||||
|     # mount 10.2.2.44:/root/.ssh /root/.ssh | ||||
|     # mount 10.2.2.44:/disk1/hpcpeadmin /home/hpcpeadmin | ||||
| @@ -113,16 +113,16 @@ Execute steps on xCAT MN rhmn1 | ||||
|  | ||||
|    Download xcat-core tar ball and xcat-dep tar ball from github, and untar them: :: | ||||
|  | ||||
|     # mkdir /install/xcat  | ||||
|     # mv xcat-core-2.8.4.tar.bz2 /install/xcat/  | ||||
|     # mkdir /install/xcat | ||||
|     # mv xcat-core-2.8.4.tar.bz2 /install/xcat/ | ||||
|     # mv xcat-dep-201404250449.tar.bz2 /install/xcat/ | ||||
|     # cd /install/xcat  | ||||
|     # cd /install/xcat | ||||
|     # tar -jxvf xcat-core-2.8.4.tar.bz2 | ||||
|     # tar -jxvf xcat-dep-201404250449.tar.bz2 | ||||
|     # cd xcat-core | ||||
|     # ./mklocalrepo.sh | ||||
|     # cd ../xcat-dep/rh6/x86_64/ | ||||
|     # ./mklocalrepo.sh  | ||||
|     # ./mklocalrepo.sh | ||||
|     # yum clean metadata | ||||
|     # yum install xCAT | ||||
|     # source /etc/profile.d/xcat.sh | ||||
| @@ -232,7 +232,7 @@ Install corosync and pacemaker on both rhmn2 and rhmn1 | ||||
|     enabled=1 | ||||
|     gpgcheck=0 | ||||
|  | ||||
| #. Install ``corosync`` and ``pacemaker``, then generate ssh key:  | ||||
| #. Install ``corosync`` and ``pacemaker``, then generate ssh key: | ||||
|  | ||||
|    Install ``corosync`` and ``pacemaker``: :: | ||||
|  | ||||
| @@ -333,15 +333,15 @@ Be aware that you need to apply ALL the configuration at once. You cannot pick a | ||||
|  | ||||
|     Check that both rhmn2 and chetha are standby state now: :: | ||||
|  | ||||
|      rhmn1 ~]# crm status  | ||||
|      Last updated: Wed Aug 13 22:57:58 2014  | ||||
|      Last change: Wed Aug 13 22:40:31 2014 via cibadmin on rhmn1  | ||||
|      Stack: classic openais (with plugin)  | ||||
|      Current DC: rhmn2 - partition with quorum  | ||||
|      Version: 1.1.8-7.el6-394e906  | ||||
|      2 Nodes configured, 2 expected votes  | ||||
|      14 Resources configured.  | ||||
|      Node rhmn1: standby  | ||||
|      rhmn1 ~]# crm status | ||||
|      Last updated: Wed Aug 13 22:57:58 2014 | ||||
|      Last change: Wed Aug 13 22:40:31 2014 via cibadmin on rhmn1 | ||||
|      Stack: classic openais (with plugin) | ||||
|      Current DC: rhmn2 - partition with quorum | ||||
|      Version: 1.1.8-7.el6-394e906 | ||||
|      2 Nodes configured, 2 expected votes | ||||
|      14 Resources configured. | ||||
|      Node rhmn1: standby | ||||
|      Node rhmn2: standby | ||||
|  | ||||
|     Execute ``crm configure edit`` to add all configure at once: :: | ||||
| @@ -481,29 +481,29 @@ Verify auto fail over | ||||
| #. Let rhmn1 standby and rhmn2 online, xcat will run on rhmn2: :: | ||||
|  | ||||
|      rhmn2 /]# crm node online rhmn2 | ||||
|      rhmn2 /]# crm node standby rhmn1  | ||||
|      rhmn2 /]# crm status  | ||||
|      Last updated: Mon Aug 4 23:19:33 2014  | ||||
|      Last change: Mon Aug 4 23:19:40 2014 via crm_attribute on rhmn2  | ||||
|      Stack: classic openais (with plugin)  | ||||
|      Current DC: rhmn1 - partition with quorum  | ||||
|      Version: 1.1.8-7.el6-394e906  | ||||
|      2 Nodes configured, 2 expected votes  | ||||
|      12 Resources configured.  | ||||
|      rhmn2 /]# crm node standby rhmn1 | ||||
|      rhmn2 /]# crm status | ||||
|      Last updated: Mon Aug 4 23:19:33 2014 | ||||
|      Last change: Mon Aug 4 23:19:40 2014 via crm_attribute on rhmn2 | ||||
|      Stack: classic openais (with plugin) | ||||
|      Current DC: rhmn1 - partition with quorum | ||||
|      Version: 1.1.8-7.el6-394e906 | ||||
|      2 Nodes configured, 2 expected votes | ||||
|      12 Resources configured. | ||||
|  | ||||
|      Node rhmn1: standby  | ||||
|      Online: [ rhmn2 ]  | ||||
|      Node rhmn1: standby | ||||
|      Online: [ rhmn2 ] | ||||
|  | ||||
|      Resource Group: XCAT_GROUP  | ||||
|      xCATmnVIP (ocf::heartbeat:IPaddr2): Started rhmn2  | ||||
|      INSTALLFS (ocf::heartbeat:Filesystem): Started rhmn2  | ||||
|      ETCXCATFS (ocf::heartbeat:Filesystem): Started rhmn2  | ||||
|      ROOTXCATFS (ocf::heartbeat:Filesystem): Started rhmn2  | ||||
|      NFSlock_xCAT (lsb:nfslock): Started rhmn2  | ||||
|      xCAT (lsb:xcatd): Started rhmn2  | ||||
|      Clone Set: clone_named [named]  | ||||
|      Started: [ rhmn2 ]  | ||||
|      Stopped: [ named:1 ]  | ||||
|      Resource Group: XCAT_GROUP | ||||
|      xCATmnVIP (ocf::heartbeat:IPaddr2): Started rhmn2 | ||||
|      INSTALLFS (ocf::heartbeat:Filesystem): Started rhmn2 | ||||
|      ETCXCATFS (ocf::heartbeat:Filesystem): Started rhmn2 | ||||
|      ROOTXCATFS (ocf::heartbeat:Filesystem): Started rhmn2 | ||||
|      NFSlock_xCAT (lsb:nfslock): Started rhmn2 | ||||
|      xCAT (lsb:xcatd): Started rhmn2 | ||||
|      Clone Set: clone_named [named] | ||||
|      Started: [ rhmn2 ] | ||||
|      Stopped: [ named:1 ] | ||||
|  | ||||
|      rhmn2 /]#lsxcatd -v | ||||
|      Version 2.8.4 (git commit 7306ca8abf1c6d8c68d3fc3addc901c1bcb6b7b3, built Mon Apr 21 20:48:59 EDT 2014) | ||||
|   | ||||
| @@ -95,7 +95,7 @@ procedure to move its CNs over to the backup SN. | ||||
| Move the nodes to the new service nodes | ||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| Use the :doc:`snmove </guides/admin-guides/references/man1/snmove.1>` command to make the database changes necessary to move a set of compute nodes from one Service Node to another.  | ||||
| Use the :doc:`snmove </guides/admin-guides/references/man1/snmove.1>` command to make the database changes necessary to move a set of compute nodes from one Service Node to another. | ||||
|  | ||||
| To switch all the compute nodes from Service Node ``sn1`` to the backup Service Node ``sn2``, run: :: | ||||
|  | ||||
|   | ||||
| @@ -1,10 +1,10 @@ | ||||
| Appendix C: Migrating a Management Node to a Service Node | ||||
| ========================================================= | ||||
|  | ||||
| Directly converting an existing Management Node to a Service Node may have some issues and is not recommended.  Do the following steps to convert the xCAT Management Node into a Service node:  | ||||
| Directly converting an existing Management Node to a Service Node may have some issues and is not recommended.  Do the following steps to convert the xCAT Management Node into a Service node: | ||||
|  | ||||
| #. backup your xCAT database on the Management Node | ||||
| #. Install a new xCAT Management node | ||||
| #. Restore your xCAT database into the new Management Node | ||||
| #. Re-provision the old xCAT Management Node as a new Service Node  | ||||
| #. Re-provision the old xCAT Management Node as a new Service Node | ||||
|  | ||||
|   | ||||
| @@ -7,7 +7,7 @@ Add the defined nodes into the DHCP configuration, refer to: | ||||
| `XCAT_pLinux_Clusters/#configure-dhcp <http://localhost/fake_todo>`_ | ||||
|  | ||||
| In the large cluster, the size of dhcp lease file "/var/lib/dhcpd/dhcpd.leases" on the DHCP server will grow over time. At around 100MB in size, the DHCP server will take a long time to respond to DHCP requests from clients and cause DHCP timeouts: :: | ||||
|   | ||||
|  | ||||
|    ... | ||||
|    Mar  2 01:59:10 c656ems2 dhcpd: DHCPDISCOVER from 00:0a:f7:73:7d:d0 via eth0 | ||||
|    Mar  2 01:59:10 c656ems2 dhcpd: DHCPOFFER on 9.114.39.101 to 00:0a:f7:73:7d:d0 via eth0 | ||||
|   | ||||
| @@ -21,7 +21,7 @@ where the /tmp/servicenodes contains a host per line: :: | ||||
|       10.%.%.% | ||||
|       node2.cluster.net | ||||
|  | ||||
| **While not recommended**, if you wish to manually migrate your xCAT database, see the following documentation:  | ||||
| **While not recommended**, if you wish to manually migrate your xCAT database, see the following documentation: | ||||
| `Manually set up MySQL <https://sourceforge.net/p/xcat/wiki/Setting_Up_MySQL_as_the_xCAT_DB/#configure-mysql-manually>`_ | ||||
|  | ||||
| .. _grante_revoke_mysql_access_label: | ||||
| @@ -36,7 +36,7 @@ Granting/Revoking access to the database for Service Node Clients | ||||
| * Granting access to the xCAT database.  Service Nodes are required for xCAT hierarchical support.  Compute nodes may also need access that depends on which application is going to run. (xcat201 is xcatadmin's password for following examples) :: | ||||
|  | ||||
|     MariaDB > GRANT ALL on xcatdb.* TO xcatadmin@<servicenode(s)> IDENTIFIED BY 'xcat201'; | ||||
|   | ||||
|  | ||||
|   Use the wildcards to do a GRANT ALL to every ipaddress or nodename that need to access the database. :: | ||||
|  | ||||
|     MariaDB > GRANT ALL on xcatdb.* TO xcatadmin@'%.cluster.net' IDENTIFIED BY 'xcat201'; | ||||
| @@ -50,4 +50,4 @@ Granting/Revoking access to the database for Service Node Clients | ||||
|  | ||||
|    MariaDB > SELECT host, user FROM mysql.user; | ||||
|  | ||||
|    | ||||
|  | ||||
|   | ||||
| @@ -15,7 +15,7 @@ The MySQL database is supported by xCAT since xCAT 2.1.  MariaDB is a fork of th | ||||
| | xCAT 2.10+ | Yes        | Yes        | | ||||
| +------------+------------+------------+ | ||||
|  | ||||
| MySQL/MariaDB packages are shipped as part of most Linux Distributions.  | ||||
| MySQL/MariaDB packages are shipped as part of most Linux Distributions. | ||||
|  | ||||
|  | ||||
| Red Hat Enterprise Linux | ||||
| @@ -55,12 +55,12 @@ Suse Linux Enterprise Server | ||||
|        libqt4-sql-mysql-* | ||||
|        libmysqlclient18-* | ||||
|        perl-DBD-mysql-* | ||||
|         | ||||
|  | ||||
| Debian/Ubuntu  | ||||
|  | ||||
| Debian/Ubuntu | ||||
| ------------- | ||||
|  | ||||
| * MySQL - Using ``apt-get``, ensure that the following packages are installed on the management node: ::  | ||||
| * MySQL - Using ``apt-get``, ensure that the following packages are installed on the management node: :: | ||||
|  | ||||
|         mysql-server | ||||
|         mysql-common | ||||
|   | ||||
| @@ -12,18 +12,18 @@ If you no longer want to use MySQL/MariaDB to maintain ``xcatdb``, and like to s | ||||
|  | ||||
|       XCATBYPASS=1 restorexCATdb -p ~/xcat-dbback | ||||
|  | ||||
| *  Change to PostgreSQL, following documentation: :doc:`/advanced/hierarchy/databases/postgres_install`  | ||||
| *  Change to PostgreSQL, following documentation: :doc:`/advanced/hierarchy/databases/postgres_install` | ||||
|  | ||||
|  | ||||
| *  Change back to default xCAT database, SQLite (**Note**:  xCAT Hierarchy cluster will no longer work) | ||||
|  | ||||
|   #. Stop the ``xcatd`` daemon on the management node. ::  | ||||
|   #. Stop the ``xcatd`` daemon on the management node. :: | ||||
|  | ||||
|       service xcatd stop | ||||
|  | ||||
|   #. Remove the ``xatdb`` from MySQL/MariaDB (optional): ::  | ||||
|   #. Remove the ``xatdb`` from MySQL/MariaDB (optional): :: | ||||
|  | ||||
|       /usr/bin/mysql -u root -p  | ||||
|       /usr/bin/mysql -u root -p | ||||
|  | ||||
|      drop the xcatdb: :: | ||||
|  | ||||
| @@ -34,10 +34,10 @@ If you no longer want to use MySQL/MariaDB to maintain ``xcatdb``, and like to s | ||||
|        mysql> drop user xcatadm; | ||||
|  | ||||
|   #. Move, or remove, the  ``/etc/xcat/cfglog`` file as it points xCAT to MySQL/MariaDB.  (without this file, xCAT defaults to SQLite): :: | ||||
|     | ||||
|       rm /etc/xcat/cfgloc  | ||||
|  | ||||
|       rm /etc/xcat/cfgloc | ||||
|  | ||||
|   #. Restart ``xcatd``: :: | ||||
|  | ||||
|       service xcatd start  | ||||
|       service xcatd start | ||||
|  | ||||
|   | ||||
| @@ -20,17 +20,17 @@ Start/Stop MySQL/MariaDB service | ||||
|     service mysql stop | ||||
|  | ||||
|  | ||||
| Basic MySQL/MariaDB commands  | ||||
| Basic MySQL/MariaDB commands | ||||
| ----------------------------- | ||||
|  | ||||
| Refer to `<https://www.mariadb.org/>`_ for the latest documentation. | ||||
|  | ||||
| * Using ``mysql``, connect to the xcat database:  :: | ||||
|     | ||||
|  | ||||
|     mysql -u root -p | ||||
|  | ||||
| * List the hosts and users which managed by this xcat MN: :: | ||||
|     | ||||
|  | ||||
|     MariaDB> SELECT host, user FROM mysql.user; | ||||
|  | ||||
| * List the databases: :: | ||||
|   | ||||
| @@ -8,13 +8,13 @@ A utility is provided to migrate an existing xCAT database from SQLite to Postgr | ||||
|  | ||||
|     pgsqlsetup -i -V | ||||
|  | ||||
| **While not recommended**, if you wish to manually migrate your xCAT database, see the following documentation:  | ||||
| **While not recommended**, if you wish to manually migrate your xCAT database, see the following documentation: | ||||
| `Manually set up PostgreSQL <https://sourceforge.net/p/xcat/wiki/Setting_Up_PostgreSQL_as_the_xCAT_DB/#manually-setup-postgresql>`_ | ||||
|  | ||||
| Setting up the Service Nodes  | ||||
| Setting up the Service Nodes | ||||
| ---------------------------- | ||||
|  | ||||
| For service nodes, add the IP address of each service nodes to the postgres configuration file: ``/var/lib/pgsql/data/pg_hba.conf``   | ||||
| For service nodes, add the IP address of each service nodes to the postgres configuration file: ``/var/lib/pgsql/data/pg_hba.conf`` | ||||
|  | ||||
| If you had the following two service nodes: :: | ||||
|  | ||||
| @@ -29,10 +29,10 @@ You would add the following to ``/var/lib/pgsql/data/pg_hba.conf`` :: | ||||
|  | ||||
| Restart PostgreSQL after editing the file: :: | ||||
|  | ||||
|     service postgresql restart  | ||||
|     service postgresql restart | ||||
|  | ||||
|  | ||||
| For more information about changing the ``pg_hab.conf`` file and ``postgresql.conf`` files, see the following documentation:  | ||||
| For more information about changing the ``pg_hab.conf`` file and ``postgresql.conf`` files, see the following documentation: | ||||
| `Setup the PostgreSQL Configuration Files <https://sourceforge.net/p/xcat/wiki/Setting_Up_PostgreSQL_as_the_xCAT_DB/#setup-the-postgresql-configuration-files>`_ | ||||
|  | ||||
| .. _modify_postgresql_database_diretory: | ||||
|   | ||||
| @@ -16,7 +16,7 @@ Using yum, install the following rpms: :: | ||||
| Suse Linux Enterprise Server | ||||
| ---------------------------- | ||||
|  | ||||
| **Note:** On SLES, ``perl-DBD`` packages are provided on the SDK iso images.  | ||||
| **Note:** On SLES, ``perl-DBD`` packages are provided on the SDK iso images. | ||||
|  | ||||
| Using zyppr, install the following rpms: :: | ||||
|  | ||||
| @@ -24,7 +24,7 @@ Using zyppr, install the following rpms: :: | ||||
|     zypper install perl-DBD-Pg | ||||
|  | ||||
|  | ||||
| Debian/Ubuntu  | ||||
| Debian/Ubuntu | ||||
| ------------- | ||||
|  | ||||
| Using apt, install the following packages: :: | ||||
|   | ||||
| @@ -8,12 +8,12 @@ To remove ``xcatdb`` completely from the PostgreSQL database: | ||||
|       mkdir -p ~/xcat-dbback | ||||
|       dumpxCATdb -p ~/xcat-dbback | ||||
|  | ||||
| #. Stop the ``xcatd`` daemon on the management node.   | ||||
| #. Stop the ``xcatd`` daemon on the management node. | ||||
|    **Note:** If you are using *xCAT Hierarchy (service nodes)* and removing ``xcatdb`` from postgres, hierarchy will no longer work. You will need to configure another database which supports remote database access to continue using the hierarchy feature. :: | ||||
|  | ||||
|       service xcatd stop | ||||
|  | ||||
| #. Remove the ``xatdb`` from PostgreSQL: ::  | ||||
| #. Remove the ``xatdb`` from PostgreSQL: :: | ||||
|  | ||||
|       su - postgres | ||||
|  | ||||
| @@ -31,7 +31,7 @@ To remove ``xcatdb`` completely from the PostgreSQL database: | ||||
|       rm -rf * | ||||
|  | ||||
| #. Move, or remove, the  ``/etc/xcat/cfglog`` file as it points xCAT to PostgreSQL.  (without this file, xCAT defaults to SQLite): :: | ||||
|     | ||||
|  | ||||
|       mv /etc/xcat/cfgloc /etc/xcat/cfglog.postgres | ||||
|  | ||||
| #. Restore the PostgreSQL database into SQLite: :: | ||||
| @@ -40,5 +40,5 @@ To remove ``xcatdb`` completely from the PostgreSQL database: | ||||
|  | ||||
| #. Restart ``xcatd``: :: | ||||
|  | ||||
|       service xcatd start  | ||||
|       service xcatd start | ||||
|  | ||||
|   | ||||
| @@ -9,13 +9,13 @@ Using PostgreSQL | ||||
| Use the psql command line utility to connect to the PostgreSQL database: :: | ||||
|  | ||||
|     su - postgres | ||||
|     psql -h <hostname> -U xcatadm -d xcatdb  | ||||
|     psql -h <hostname> -U xcatadm -d xcatdb | ||||
|  | ||||
|  | ||||
| Useful Commands | ||||
| --------------- | ||||
|  | ||||
| * Show create statement for a table, for example prescripts table. ::  | ||||
| * Show create statement for a table, for example prescripts table. :: | ||||
|  | ||||
|     /usr/bin/pg_dump xcatdb -U xcatadm -t prescripts | ||||
|  | ||||
| @@ -26,7 +26,7 @@ Useful Commands | ||||
|     # drop the xcatdb | ||||
|     dropdb xcatdb | ||||
|  | ||||
|     # remove the xcatadm database owner  | ||||
|     # remove the xcatadm database owner | ||||
|     dropuser xcatadm | ||||
|  | ||||
|     # clean up the postgresql files (necessary if you want to re-create the database) | ||||
| @@ -38,7 +38,7 @@ Useful Commands | ||||
|     su - postgres | ||||
|     psql -l | ||||
|  | ||||
| * Access the database: ::  | ||||
| * Access the database: :: | ||||
|  | ||||
|     su - postgres | ||||
|     psql xcatdb | ||||
|   | ||||
| @@ -12,8 +12,8 @@ Using ``psql``, connect to the xcat database: :: | ||||
| list the xCAT tables: :: | ||||
|  | ||||
|       xcatdb=> \dt | ||||
|   | ||||
| show the entries in the nodelist table: ::  | ||||
|  | ||||
| show the entries in the nodelist table: :: | ||||
|  | ||||
|       xcatdb=> select * from nodelist; | ||||
|  | ||||
| @@ -29,7 +29,7 @@ Show the SQL create statement for a table: :: | ||||
|  | ||||
|       /usr/bin/pg_dump_xcatdb -U xcatadm -t <table_name> | ||||
|  | ||||
|       # example, for prescripts table:  | ||||
|       # example, for prescripts table: | ||||
|       /usr/bin/pg_dump xcatdb -U xcatadm -t prescripts | ||||
|  | ||||
| List all databases in postgres: :: | ||||
|   | ||||
| @@ -4,16 +4,16 @@ Define and install your Compute Nodes | ||||
| Make /install available on the Service Nodes | ||||
| -------------------------------------------- | ||||
|  | ||||
| Note that all of the files and directories pointed to by your osimages should  | ||||
| be placed under the directory referred to in site.installdir (usually  | ||||
| /install), so they will be available to the service nodes. The installdir  | ||||
| directory is mounted or copied to the service nodes during the hierarchical  | ||||
| Note that all of the files and directories pointed to by your osimages should | ||||
| be placed under the directory referred to in site.installdir (usually | ||||
| /install), so they will be available to the service nodes. The installdir | ||||
| directory is mounted or copied to the service nodes during the hierarchical | ||||
| installation. | ||||
|  | ||||
| If you are not using the NFS-based statelite method of booting your compute  | ||||
| nodes and you are not using service node pools, set the installloc attribute  | ||||
| to "/install". This instructs the service node to mount /install from the  | ||||
| management node. (If you don't do this, you have to manually sync /install  | ||||
| If you are not using the NFS-based statelite method of booting your compute | ||||
| nodes and you are not using service node pools, set the installloc attribute | ||||
| to "/install". This instructs the service node to mount /install from the | ||||
| management node. (If you don't do this, you have to manually sync /install | ||||
| between the management node and the service nodes.) | ||||
|  | ||||
| :: | ||||
| @@ -23,17 +23,17 @@ between the management node and the service nodes.) | ||||
| Make compute node syncfiles available on the servicenodes | ||||
| --------------------------------------------------------- | ||||
|  | ||||
| If you are not using the NFS-based statelite method of booting your compute  | ||||
| nodes, and you plan to use the syncfiles postscript to update files on the  | ||||
| nodes during install, you must ensure that those files are sync'd to the  | ||||
| servicenodes before the install of the compute nodes. To do this after your  | ||||
| nodes are defined, you will need to run the following whenever the files in  | ||||
| If you are not using the NFS-based statelite method of booting your compute | ||||
| nodes, and you plan to use the syncfiles postscript to update files on the | ||||
| nodes during install, you must ensure that those files are sync'd to the | ||||
| servicenodes before the install of the compute nodes. To do this after your | ||||
| nodes are defined, you will need to run the following whenever the files in | ||||
| your synclist change on the Management Node: | ||||
| :: | ||||
|  | ||||
|   updatenode <computenoderange> -f | ||||
|  | ||||
| At this point you can return to the documentation for your cluster environment  | ||||
| At this point you can return to the documentation for your cluster environment | ||||
| to define and deploy your compute nodes. | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -16,8 +16,8 @@ group called **service**. Some of the commands in this document will use the | ||||
| group **service** to update all service nodes. | ||||
|  | ||||
| Note: a Service Node's service node is the Management Node; so a service node | ||||
| must have a direct connection to the management node. The compute nodes do not  | ||||
| have to be directly attached to the Management Node, only to their service  | ||||
| must have a direct connection to the management node. The compute nodes do not | ||||
| have to be directly attached to the Management Node, only to their service | ||||
| node. This will all have to be defined in your networks table. | ||||
|  | ||||
| Add Service Nodes to the nodelist Table | ||||
| @@ -25,11 +25,11 @@ Add Service Nodes to the nodelist Table | ||||
|  | ||||
| Define your service nodes (if not defined already), and by convention we put | ||||
| them in a **service** group. We usually have a group compute for our compute | ||||
| nodes, to distinguish between the two types of nodes. (If you want to use your  | ||||
| own group name for service nodes, rather than service, you need to change some  | ||||
| defaults in the xCAT db that use the group name service. For example, in the  | ||||
| postscripts table there is by default a group entry for service, with the  | ||||
| appropriate postscripts to run when installing a service node. Also, the  | ||||
| nodes, to distinguish between the two types of nodes. (If you want to use your | ||||
| own group name for service nodes, rather than service, you need to change some | ||||
| defaults in the xCAT db that use the group name service. For example, in the | ||||
| postscripts table there is by default a group entry for service, with the | ||||
| appropriate postscripts to run when installing a service node. Also, the | ||||
| default ``kickstart/autoyast`` template, pkglist, etc that will be used have | ||||
| files names based on the profile name service.) :: | ||||
|  | ||||
| @@ -44,7 +44,7 @@ appropriate for SNs. Display the list of osimages and choose one with | ||||
|  | ||||
|    lsdef -t osimage | ||||
|  | ||||
| For this example, let's assume you chose the stateful osimage definition for  | ||||
| For this example, let's assume you chose the stateful osimage definition for | ||||
| rhels 7: rhels7-x86_64-install-service . If you want to modify any of the | ||||
| osimage attributes (e.g. ``kickstart/autoyast`` template, pkglist, etc), | ||||
| make a copy of the osimage definition and also copy to ``/install/custom`` | ||||
| @@ -63,16 +63,16 @@ Now set some of the common attributes for the SNs at the group level: :: | ||||
| Add Service Nodes to the servicenode Table | ||||
| ------------------------------------------ | ||||
|  | ||||
| An entry must be created in the servicenode table for each service node or the  | ||||
| service group. This table describes all the services you would like xcat to  | ||||
| setup on the service nodes. (Even if you don't want xCAT to set up any  | ||||
| services - unlikely - you must define the service nodes in the servicenode  | ||||
| table with at least one attribute set (you can set it to 0), otherwise it will  | ||||
| An entry must be created in the servicenode table for each service node or the | ||||
| service group. This table describes all the services you would like xcat to | ||||
| setup on the service nodes. (Even if you don't want xCAT to set up any | ||||
| services - unlikely - you must define the service nodes in the servicenode | ||||
| table with at least one attribute set (you can set it to 0), otherwise it will | ||||
| not be recognized as a service node.) | ||||
|  | ||||
| When the xcatd daemon is started or restarted on the service node, it will  | ||||
| make sure all of the requested services are configured and started. (To  | ||||
| temporarily avoid this when restarting xcatd, use "service xcatd reload"  | ||||
| When the xcatd daemon is started or restarted on the service node, it will | ||||
| make sure all of the requested services are configured and started. (To | ||||
| temporarily avoid this when restarting xcatd, use "service xcatd reload" | ||||
| instead.) | ||||
|  | ||||
| To set up the minimum recommended services on the service nodes: :: | ||||
| @@ -138,9 +138,9 @@ value as the node's servicenode attribute. | ||||
|  | ||||
| Host name resolution must have been setup in advance, with ``/etc/hosts``, DNS | ||||
| or dhcp to ensure that the names put in this table can be resolved on the | ||||
| Management Node, Service nodes, and the compute nodes. It is easiest to have a  | ||||
| node group of the compute nodes for each service node. For example, if all the  | ||||
| nodes in node group compute1 are serviced by sn1 and all the nodes in node  | ||||
| Management Node, Service nodes, and the compute nodes. It is easiest to have a | ||||
| node group of the compute nodes for each service node. For example, if all the | ||||
| nodes in node group compute1 are serviced by sn1 and all the nodes in node | ||||
| group compute2 are serviced by sn2: | ||||
|  | ||||
| :: | ||||
| @@ -148,18 +148,18 @@ group compute2 are serviced by sn2: | ||||
|   chdef -t group compute1 servicenode=sn1 xcatmaster=sn1-c | ||||
|   chdef -t group compute2 servicenode=sn2 xcatmaster=sn2-c | ||||
|  | ||||
| Note: in this example, sn1 and sn2 are the node names of the service nodes  | ||||
| (and therefore the hostnames associated with the NICs that the MN talks to).  | ||||
| The hostnames sn1-c and sn2-c are associated with the SN NICs that communicate  | ||||
| Note: in this example, sn1 and sn2 are the node names of the service nodes | ||||
| (and therefore the hostnames associated with the NICs that the MN talks to). | ||||
| The hostnames sn1-c and sn2-c are associated with the SN NICs that communicate | ||||
| with their compute nodes. | ||||
|  | ||||
| Note: if not set, the attribute tftpserver's default value is xcatmaster, | ||||
| but in some releases of xCAT it has not defaulted correctly, so it is safer | ||||
| to set the tftpserver to the value of xcatmaster. | ||||
|  | ||||
| These attributes will allow you to specify which service node should run the  | ||||
| conserver (console) and monserver (monitoring) daemon for the nodes in the  | ||||
| group specified in the command. In this example, we are having each node's  | ||||
| These attributes will allow you to specify which service node should run the | ||||
| conserver (console) and monserver (monitoring) daemon for the nodes in the | ||||
| group specified in the command. In this example, we are having each node's | ||||
| primary SN also act as its conserver and monserver (the most typical setup). | ||||
| :: | ||||
|  | ||||
| @@ -169,43 +169,43 @@ primary SN also act as its conserver and monserver (the most typical setup). | ||||
| Service Node Pools | ||||
| ^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| Service Node Pools are multiple service nodes that service the same set of  | ||||
| compute nodes. Having multiple service nodes allows backup service node(s) for  | ||||
| a compute node when the primary service node is unavailable, or can be used  | ||||
| for work-load balancing on the service nodes. But note that the selection of  | ||||
| which SN will service which compute node is made at compute node boot time.  | ||||
| After that, the selection of the SN for this compute node is fixed until the  | ||||
| compute node is rebooted or the compute node is explicitly moved to another SN  | ||||
| Service Node Pools are multiple service nodes that service the same set of | ||||
| compute nodes. Having multiple service nodes allows backup service node(s) for | ||||
| a compute node when the primary service node is unavailable, or can be used | ||||
| for work-load balancing on the service nodes. But note that the selection of | ||||
| which SN will service which compute node is made at compute node boot time. | ||||
| After that, the selection of the SN for this compute node is fixed until the | ||||
| compute node is rebooted or the compute node is explicitly moved to another SN | ||||
| using the `snmove <http://localhost/fake_todo>`_  command. | ||||
|  | ||||
| To use Service Node pools, you need to architect your network such that all of  | ||||
| the compute nodes and service nodes in a particular pool are on the same flat  | ||||
| To use Service Node pools, you need to architect your network such that all of | ||||
| the compute nodes and service nodes in a particular pool are on the same flat | ||||
| network. If you don't want the management node to respond to manage some of | ||||
| the compute nodes, it shouldn't be on that same flat network. The  | ||||
| the compute nodes, it shouldn't be on that same flat network. The | ||||
| site, dhcpinterfaces attribute should be set such that the SNs' DHCP daemon | ||||
| only listens on the NIC that faces the compute nodes, not the NIC that faces  | ||||
| the MN. This avoids some timing issues when the SNs are being deployed (so  | ||||
| that they don't respond to each other before they are completely ready). You  | ||||
| only listens on the NIC that faces the compute nodes, not the NIC that faces | ||||
| the MN. This avoids some timing issues when the SNs are being deployed (so | ||||
| that they don't respond to each other before they are completely ready). You | ||||
| also need to make sure the `networks <http://localhost/fake_todo>`_ table | ||||
| accurately reflects the physical network structure. | ||||
|  | ||||
| To define a list of service nodes that support a set of compute nodes, set the  | ||||
| servicenode attribute to a comma-delimited list of the service nodes. When  | ||||
| running an xCAT command like xdsh or updatenode for compute nodes, the list  | ||||
| will be processed left to right, picking the first service node on the list to  | ||||
| run the command. If that service node is not available, then the next service  | ||||
| node on the list will be chosen until the command is successful. Errors will  | ||||
| be logged. If no service node on the list can process the command, then the  | ||||
| error will be returned. You can provide some load-balancing by assigning your  | ||||
| To define a list of service nodes that support a set of compute nodes, set the | ||||
| servicenode attribute to a comma-delimited list of the service nodes. When | ||||
| running an xCAT command like xdsh or updatenode for compute nodes, the list | ||||
| will be processed left to right, picking the first service node on the list to | ||||
| run the command. If that service node is not available, then the next service | ||||
| node on the list will be chosen until the command is successful. Errors will | ||||
| be logged. If no service node on the list can process the command, then the | ||||
| error will be returned. You can provide some load-balancing by assigning your | ||||
| service nodes as we do below. | ||||
|  | ||||
| When using service node pools, the intent is to have the service node that  | ||||
| responds first to the compute node's DHCP request during boot also be the  | ||||
| xcatmaster, the tftpserver, and the NFS/http server for that node. Therefore,  | ||||
| the xcatmaster and nfsserver attributes for nodes should not be set. When  | ||||
| nodeset is run for the compute nodes, the service node interface on the  | ||||
| network to the compute nodes should be defined and active, so that nodeset  | ||||
| will default those attribute values to the "node ip facing" interface on that  | ||||
| When using service node pools, the intent is to have the service node that | ||||
| responds first to the compute node's DHCP request during boot also be the | ||||
| xcatmaster, the tftpserver, and the NFS/http server for that node. Therefore, | ||||
| the xcatmaster and nfsserver attributes for nodes should not be set. When | ||||
| nodeset is run for the compute nodes, the service node interface on the | ||||
| network to the compute nodes should be defined and active, so that nodeset | ||||
| will default those attribute values to the "node ip facing" interface on that | ||||
| service node. | ||||
|  | ||||
| For example: :: | ||||
| @@ -213,7 +213,7 @@ For example: :: | ||||
|   chdef -t node compute1 servicenode=sn1,sn2 xcatmaster="" nfsserver="" | ||||
|   chdef -t node compute2 servicenode=sn2,sn1 xcatmaster="" nfsserver="" | ||||
|  | ||||
| You need to set the sharedtftp site attribute to 0 so that the SNs will not  | ||||
| You need to set the sharedtftp site attribute to 0 so that the SNs will not | ||||
| automatically mount the ``/tftpboot`` directory from the management node: | ||||
| :: | ||||
|  | ||||
| @@ -233,8 +233,8 @@ from this rsync. | ||||
|  | ||||
|   rsync -auv --exclude 'autoinst' /install sn1:/ | ||||
|  | ||||
| Note: If your service nodes are stateless and site.sharedtftp=0, if you reboot  | ||||
| any service node when using servicenode pools, any data written to the local  | ||||
| Note: If your service nodes are stateless and site.sharedtftp=0, if you reboot | ||||
| any service node when using servicenode pools, any data written to the local | ||||
| ``/tftpboot`` directory of that SN is lost. You will need to run nodeset for | ||||
| all of the compute nodes serviced by that SN again. | ||||
|  | ||||
| @@ -244,14 +244,14 @@ networks table, see ref: networks table, see :ref:`setup_networks_table_label`. | ||||
| Conserver and Monserver and Pools | ||||
| """"""""""""""""""""""""""""""""" | ||||
|  | ||||
| The support of conserver and monserver with Service Node Pools is still not  | ||||
| supported. You must explicitly assign these functions to a service node using  | ||||
| The support of conserver and monserver with Service Node Pools is still not | ||||
| supported. You must explicitly assign these functions to a service node using | ||||
| the nodehm.conserver and noderes.monserver attribute as above. | ||||
|  | ||||
| Setup Site Table | ||||
| ---------------- | ||||
|  | ||||
| If you are not using the NFS-based statelite method of booting your compute  | ||||
| If you are not using the NFS-based statelite method of booting your compute | ||||
| nodes, set the installloc attribute to ``/install``. This instructs the | ||||
| service node to mount ``/install`` from the management node. (If you don't do | ||||
| this, you have to manually sync ``/install`` between the management node and | ||||
| @@ -259,11 +259,11 @@ the service nodes.) :: | ||||
|  | ||||
|   chdef -t site  clustersite installloc="/install" | ||||
|  | ||||
| For IPMI controlled nodes, if you want the out-of-band IPMI operations to be  | ||||
| done directly from the management node (instead of being sent to the  | ||||
| For IPMI controlled nodes, if you want the out-of-band IPMI operations to be | ||||
| done directly from the management node (instead of being sent to the | ||||
| appropriate service node), set site.ipmidispatch=n. | ||||
|  | ||||
| If you want to throttle the rate at which nodes are booted up, you can set the  | ||||
| If you want to throttle the rate at which nodes are booted up, you can set the | ||||
| following site attributes: | ||||
|  | ||||
|  | ||||
| @@ -278,7 +278,7 @@ See the `site table man page <http://localhost/fack_todo>`_ for details. | ||||
| Setup networks Table | ||||
| -------------------- | ||||
|  | ||||
| All networks in the cluster must be defined in the networks table. When xCAT  | ||||
| All networks in the cluster must be defined in the networks table. When xCAT | ||||
| is installed, it runs makenetworks, which creates an entry in the networks | ||||
| table for each of the networks the management node is on. You need to add | ||||
| entries for each network the service nodes use to communicate to the compute | ||||
| @@ -288,22 +288,22 @@ For example: :: | ||||
|  | ||||
|   mkdef -t network net1 net=10.5.1.0 mask=255.255.255.224 gateway=10.5.1.1 | ||||
|  | ||||
| If you want to set the nodes' xcatmaster as the default gateway for the nodes,  | ||||
| the gateway attribute can be set to keyword "<xcatmaster>". In this case, xCAT  | ||||
| code will automatically substitute the IP address of the node's xcatmaster for  | ||||
| If you want to set the nodes' xcatmaster as the default gateway for the nodes, | ||||
| the gateway attribute can be set to keyword "<xcatmaster>". In this case, xCAT | ||||
| code will automatically substitute the IP address of the node's xcatmaster for | ||||
| the keyword. Here is an example: | ||||
| :: | ||||
|  | ||||
|   mkdef -t network net1 net=10.5.1.0 mask=255.255.255.224 gateway=<xcatmaster> | ||||
|  | ||||
| The ipforward attribute should be enabled on all the xcatmaster nodes that  | ||||
| will be acting as default gateways. You can set ipforward to 1 in the  | ||||
| servicenode table or add the line "net.ipv4.ip_forward = 1" in file  | ||||
| The ipforward attribute should be enabled on all the xcatmaster nodes that | ||||
| will be acting as default gateways. You can set ipforward to 1 in the | ||||
| servicenode table or add the line "net.ipv4.ip_forward = 1" in file | ||||
| ``/etc/sysctl.conf`` and then run "sysctl -p /etc/sysctl.conf" manually to | ||||
| enable the ipforwarding. | ||||
|  | ||||
| Note:If using service node pools, the networks table dhcpserver attribute can  | ||||
| be set to any single service node in your pool. The networks tftpserver, and  | ||||
| Note:If using service node pools, the networks table dhcpserver attribute can | ||||
| be set to any single service node in your pool. The networks tftpserver, and | ||||
| nameserver attributes should be left blank. | ||||
|  | ||||
| Verify the Tables | ||||
| @@ -317,17 +317,17 @@ compute1, compute2: :: | ||||
| Add additional adapters configuration script (optional) | ||||
| ------------------------------------------------------------ | ||||
|  | ||||
| It is possible to have additional adapter interfaces automatically configured  | ||||
| when the nodes are booted. XCAT provides sample configuration scripts for  | ||||
| ethernet, IB, and HFI adapters. These scripts can be used as-is or they can be  | ||||
| modified to suit your particular environment. The ethernet sample is  | ||||
| It is possible to have additional adapter interfaces automatically configured | ||||
| when the nodes are booted. XCAT provides sample configuration scripts for | ||||
| ethernet, IB, and HFI adapters. These scripts can be used as-is or they can be | ||||
| modified to suit your particular environment. The ethernet sample is | ||||
| ``/install/postscript/configeth``. When you have the configuration script that | ||||
| you want you can add it to the "postscripts" attribute as mentioned above. Make | ||||
| sure your script is in the ``/install/postscripts`` directory and that it is | ||||
| executable. | ||||
|  | ||||
| Note: For system p servers, if you plan to have your service node perform the  | ||||
| hardware control functions for its compute nodes, it is necessary that the SN  | ||||
| Note: For system p servers, if you plan to have your service node perform the | ||||
| hardware control functions for its compute nodes, it is necessary that the SN | ||||
| ethernet network adapters connected to the HW service VLAN be configured. | ||||
|  | ||||
| Configuring Secondary Adapters | ||||
|   | ||||
| @@ -3,7 +3,7 @@ Define Service Nodes | ||||
|  | ||||
| This next part shows how to configure a xCAT Hierarchy and provision xCAT service nodes from an existing xCAT cluster. | ||||
|  | ||||
| *The document assumes that the compute nodes that are part of your cluster have already been defined into the xCAT database and you have successfully provisioned the compute nodes using xCAT*  | ||||
| *The document assumes that the compute nodes that are part of your cluster have already been defined into the xCAT database and you have successfully provisioned the compute nodes using xCAT* | ||||
|  | ||||
|  | ||||
| The following table illustrates the cluster being used in this example: | ||||
| @@ -33,8 +33,8 @@ The following table illustrates the cluster being used in this example: | ||||
|  | ||||
|        chdef -t site hierarchicalattrs="postscripts" | ||||
|  | ||||
| #. Select the compute nodes that will become service nodes  | ||||
|       | ||||
| #. Select the compute nodes that will become service nodes | ||||
|  | ||||
|         The first node in each rack, ``r1n01`` and ``r2n01``, is selected to become the xCAT service nodes and manage the compute nodes in that rack | ||||
|  | ||||
|  | ||||
| @@ -53,7 +53,7 @@ The following table illustrates the cluster being used in this example: | ||||
|  | ||||
|         chdef -t group -o service setupnfs=1 \ | ||||
|                                   setupdhcp=1 \ | ||||
|                                   setuptftp=1 \  | ||||
|                                   setuptftp=1 \ | ||||
|                                   setupnameserver=1 \ | ||||
|                                   setupconserver=2 | ||||
|  | ||||
| @@ -63,26 +63,26 @@ The following table illustrates the cluster being used in this example: | ||||
|       * For clusters with subnetted management networks, you might want to set ``setupupforward=1`` | ||||
|       * For the ``setupconserver`` attribute, if ``conserver`` is used, set to ``1``, if ``goconserver`` is used, set to ``2`` | ||||
|  | ||||
| #. Add additional postscripts for Service Nodes (optional)  | ||||
| #. Add additional postscripts for Service Nodes (optional) | ||||
|  | ||||
|    By default, xCAT will execute the ``servicenode`` postscript when installed or diskless booted.  This postscript will set up the necessary credentials and installs the xCAT software on the Service Nodes.  If you have additional postscripts that you want to execute on the service nodes, copy to ``/install/postscripts`` and run the following: :: | ||||
|  | ||||
|         chdef -t group -o service -p postscripts=<mypostscript> | ||||
|  | ||||
| #. Assigning Compute Nodes to their Service Nodes  | ||||
| #. Assigning Compute Nodes to their Service Nodes | ||||
|  | ||||
|    The node attributes ``servicenode`` and ``xcatmaster``, define which Service node will serve the particular compute node. | ||||
|  | ||||
|    The node attributes ``servicenode`` and ``xcatmaster``, define which Service node will serve the particular compute node.  | ||||
|     | ||||
|    * ``servicenode`` - defines which Service Node the **Management Node** should send commands to (e.g ``xdsh``) and should be set to the hostname or IP address of the service node that the management node can contact it by. | ||||
|    * ``xcatmaster`` - defines which Service Node the **Compute Node** should boot from and should be set to the hostname or IP address of the service node that the compute node can contact it by. | ||||
|  | ||||
|    You must set both ``servicenode`` and ``xcatmaster`` regardless of whether or not you are using service node pools, for most scenarios, the value will be identical. :: | ||||
|  | ||||
|         chdef -t group -o rack1 servicenode=r1n01 xcatmaster=r1n01  | ||||
|         chdef -t group -o rack1 servicenode=r1n01 xcatmaster=r1n01 | ||||
|         chdef -t group -o rack2 servicenode=r2n01 xcatmaster=r2n01 | ||||
|  | ||||
| #. Set the conserver and monserver attributes | ||||
|   | ||||
|  | ||||
|    Set which service node should run the conserver (console) and monserver (monitoring) daemon for the nodes in the group. The most typical setup is to have the service node also ad as it's conserver and monserver. :: | ||||
|  | ||||
|         chdef -t group -o rack1 conserver=r1n01 monserver=r1n01 | ||||
| @@ -101,7 +101,7 @@ The following table illustrates the cluster being used in this example: | ||||
|  | ||||
|          chdef -t site clustersite sharedtftp=0 | ||||
|          chdef -t site clustersite installloc= | ||||
|          rsync -auv --exclude 'autoinst' /install r1n01:/  | ||||
|          rsync -auv --exclude 'autoinst' /install r2n01:/  | ||||
|          rsync -auv --exclude 'autoinst' /tftpboot r1n01:/  | ||||
|          rsync -auv --exclude 'autoinst' /tftpboot r2n01:/  | ||||
|          rsync -auv --exclude 'autoinst' /install r1n01:/ | ||||
|          rsync -auv --exclude 'autoinst' /install r2n01:/ | ||||
|          rsync -auv --exclude 'autoinst' /tftpboot r1n01:/ | ||||
|          rsync -auv --exclude 'autoinst' /tftpboot r2n01:/ | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| Hierarchical Clusters / Large Cluster Support | ||||
| ============================================= | ||||
|  | ||||
| xCAT supports management of very large sized cluster by creating a **Hierarchical Cluster** and the concept of **xCAT Service Nodes**.   | ||||
| xCAT supports management of very large sized cluster by creating a **Hierarchical Cluster** and the concept of **xCAT Service Nodes**. | ||||
|  | ||||
| When dealing with large clusters, to balance the load, it is recommended to have more than one node (Management Node, "MN") handling the installation and management of the Compute Nodes ("CN").  These additional *helper* nodes are referred to as **Service Nodes** ("SN").  The Management Node can delegate all management operational needs to the Service Node responsible for a set of compute node. | ||||
|  | ||||
|   | ||||
| @@ -11,7 +11,7 @@ Any cluster using statelite compute nodes must use a stateful (diskful) Service | ||||
| Configure ``otherpkgdir`` and ``otherpkglist`` for service node osimage | ||||
| ----------------------------------------------------------------------- | ||||
|  | ||||
|  * Create a subdirectory ``xcat`` under a path specified by ``otherpkgdir`` attribute of the service node os image, selected during the :doc:`../define_service_nodes` step.  | ||||
|  * Create a subdirectory ``xcat`` under a path specified by ``otherpkgdir`` attribute of the service node os image, selected during the :doc:`../define_service_nodes` step. | ||||
|  | ||||
|    For example, for osimage *rhels7-x86_64-install-service* :: | ||||
|  | ||||
| @@ -114,17 +114,17 @@ Watch the installation progress using either wcons or rcons: :: | ||||
| Update Service Node Diskful Image | ||||
| --------------------------------- | ||||
|  | ||||
| To update the xCAT software on the Service Node:  | ||||
| To update the xCAT software on the Service Node: | ||||
|  | ||||
| #. Remove previous xcat-core, xcat-dep, and tar files in the NFS mounted ``/install/post/otherpkgs/`` directory: :: | ||||
|      | ||||
|  | ||||
|     rm /install/post/otherpkgs/<os>/<arch>/xcat/xcat-core | ||||
|     rm /install/post/otherpkgs/<os>/<arch>/xcat/xcat-dep | ||||
|     rm /install/post/otherpkgs/<os>/<arch>/xcat/<xcat-core.tar> | ||||
|     rm /install/post/otherpkgs/<os>/<arch>/xcat/<xcat-dep.tar> | ||||
|  | ||||
| #. Download the desired tar files from xcat.org on to the Management Node, and untar them in the same NFS mounted ``/install/post/otherpkgs/`` directory: :: | ||||
|   | ||||
|  | ||||
|     cd /install/post/otherpkgs/<os>/<arch>/xcat/ | ||||
|     tar jxvf <new-xcat-core.tar> | ||||
|     tar jxvf <new-xcat-dep.tar> | ||||
|   | ||||
| @@ -67,7 +67,7 @@ When you run ``copycds``, xCAT will only create a Service Node stateful osimage | ||||
|  | ||||
|   If you would like to change any of these files, copy them to a custom | ||||
|   directory. This can be any directory you choose, but we recommend that you | ||||
|   keep it /install somewhere. A good location is something like ``/install/custom/netboot/<osimage>``.  | ||||
|   keep it /install somewhere. A good location is something like ``/install/custom/netboot/<osimage>``. | ||||
|  | ||||
|   :: | ||||
|  | ||||
| @@ -172,8 +172,8 @@ When you run ``copycds``, xCAT will only create a Service Node stateful osimage | ||||
|   If you installed your management node directly from the online | ||||
|   repository, you will need to download the ``xcat-core`` and ``xcat-dep`` tarballs | ||||
|  | ||||
|   - From http://xcat.org/download.html, download the ``xcat-core`` and ``xcat-dep`` tarball files.   | ||||
|     Copy these into a subdirectory in the ``otherpkgdir`` directory.  | ||||
|   - From http://xcat.org/download.html, download the ``xcat-core`` and ``xcat-dep`` tarball files. | ||||
|     Copy these into a subdirectory in the ``otherpkgdir`` directory. | ||||
|  | ||||
|     :: | ||||
|  | ||||
| @@ -184,9 +184,9 @@ When you run ``copycds``, xCAT will only create a Service Node stateful osimage | ||||
|       cd /install/post/otherpkgs/rhels7.3/ppc64le | ||||
|       mkdir xcat | ||||
|       cd xcat | ||||
|        | ||||
|  | ||||
|       # copy the <xcat-core> and <xcat-deb> tarballs here | ||||
|        | ||||
|  | ||||
|       # extract the tarballs | ||||
|       tar -jxvf <xcat-core>.tar.bz2 | ||||
|       tar -jxvf <xcat-dep>.tar.bz2 | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| Service Nodes | ||||
| ============= | ||||
|  | ||||
| Service Nodes are similar to the xCAT Management Node in that each service Nodes runs an instance of the xCAT daemon: ``xcatd``.  ``xcatd``'s communicate with each other using the same XML/SSL protocol that the xCAT client uses to communicate with ``xcatd`` on the Management Node.  | ||||
| Service Nodes are similar to the xCAT Management Node in that each service Nodes runs an instance of the xCAT daemon: ``xcatd``.  ``xcatd``'s communicate with each other using the same XML/SSL protocol that the xCAT client uses to communicate with ``xcatd`` on the Management Node. | ||||
|  | ||||
| The Service Nodes need to communicate with the xCAT database running on the Management Node.  This is done using the remote client capabilities of the database.  This is why the default SQLite database cannot be used. | ||||
|  | ||||
|   | ||||
| @@ -6,15 +6,15 @@ Use the :doc:`buildkit </guides/admin-guides/references/man1/buildkit.1>` comman | ||||
|     buildkit create <kitbasename> [-l|--kitloc <kit location>] | ||||
|  | ||||
|  | ||||
| Kit Directory  | ||||
| Kit Directory | ||||
| ------------- | ||||
|  | ||||
| The Kit directory location will be automatically  populated with additional subdirecotries and samples:  | ||||
| The Kit directory location will be automatically  populated with additional subdirecotries and samples: | ||||
|  | ||||
| **buildkit.conf** -  The sample Kit build configuration file. | ||||
|  | ||||
| **source_packages** - This directory stores the source packages for Kit Packages and Non-Native Packages.  The **buildkit** command will search these directories for source packages when building packages.  This directory stores: | ||||
|    | ||||
|  | ||||
|   * RPM spec and tarballs. (A sample spec file is provided.) | ||||
|   * Source RPMs. | ||||
|   * Pre-built RPMs (contained in a subdirectory of source_packages) | ||||
| @@ -42,7 +42,7 @@ The Kit directory location will be automatically  populated with additional subd | ||||
| Kit Configuration File | ||||
| ---------------------- | ||||
|  | ||||
| The ``buildkit.conf`` file is a sample file that contains a description of all the supported attributes and indicates required or optional fields.  The user needs to modify this file for the software kit to be built. [#]_  | ||||
| The ``buildkit.conf`` file is a sample file that contains a description of all the supported attributes and indicates required or optional fields.  The user needs to modify this file for the software kit to be built. [#]_ | ||||
|  | ||||
| **kit** --- This stanza defines general information for the Kit.  There must be exactly one kit stanza in a kit build file.  :: | ||||
|  | ||||
| @@ -71,12 +71,12 @@ The ``buildkit.conf`` file is a sample file that contains a description of all t | ||||
|       osarch=x86_64 | ||||
|  | ||||
| minor version can be support following format: :: | ||||
|      | ||||
|  | ||||
|     osminorversion=2  <<-- minor version has to be exactly matched to 2 | ||||
|     osminorversion=>=2  <<-- minor version can be 2 or greater than 2 | ||||
|     osminorversion=<=2  <<-- minor version can be 2 or less than 2  | ||||
|     osminorversion=<=2  <<-- minor version can be 2 or less than 2 | ||||
|     osminorversion=>2  <<-- minor version has to be greater than 2 | ||||
|     osminorversion=<2  <<-- minor version has to be less than 2  | ||||
|     osminorversion=<2  <<-- minor version has to be less than 2 | ||||
|  | ||||
| **kitcomponent** --- This stanza defines one Kit Component. A kitcomponent definition is a way of specifying a subset of the product Kit that may be installed into an xCAT osimage.  A kitcomponent may or may not be dependent on other kitcomponents.If user want to build a component which supports multiple OSes, need to create one kitcomponent stanza for each OS.  :: | ||||
|  | ||||
| @@ -106,13 +106,13 @@ minor version can be support following format: :: | ||||
|        serverroles=compute | ||||
|        ospkgdeps=at,rsh-server,xinetd,sudo,libibverbs-32bit,libibverbs,insserv | ||||
|        kitrepoid=sles11_x86_64 | ||||
|        kitpkgdeps=ppe_rte_license   | ||||
|        kitpkgdeps=ppe_rte_license | ||||
|  | ||||
|  | ||||
| **kitpackage** --- This stanza defines Kit Package (ie. RPM). There can be zero or more kitpackage stanzas.  For multiple package supports,  need to  | ||||
| **kitpackage** --- This stanza defines Kit Package (ie. RPM). There can be zero or more kitpackage stanzas.  For multiple package supports,  need to | ||||
|  | ||||
|   #. Define one kitpackage section per supported OS.  or | ||||
|   #. Define one kitpacakge stanza which contains multiple kitrepoid lines. For the RPM packages, users need to responsible for creating an RPM spec file that can run on multiple OSes.   | ||||
|   #. Define one kitpacakge stanza which contains multiple kitrepoid lines. For the RPM packages, users need to responsible for creating an RPM spec file that can run on multiple OSes. | ||||
|  | ||||
| :: | ||||
|  | ||||
| @@ -149,7 +149,7 @@ minor version can be support following format: :: | ||||
| Partial vs. Complete Kits | ||||
| ------------------------- | ||||
|  | ||||
| A **complete** software kits includes all the product software and is ready to be consumed as is.   A **partial** software kit is one that does not include all the product packages and requires the consumer to download the product software and complete the kit before it can be consumed.   | ||||
| A **complete** software kits includes all the product software and is ready to be consumed as is.   A **partial** software kit is one that does not include all the product packages and requires the consumer to download the product software and complete the kit before it can be consumed. | ||||
|  | ||||
| To build partial kits, the ``isexternalpkg=yes`` needs to be set in the ``kitpackage`` stanza in the ``buildkit.conf`` file: :: | ||||
|  | ||||
|   | ||||
| @@ -25,4 +25,4 @@ If the Kit Package Repository is not fully built, the command builds it as follo | ||||
|     #. Build the Component Meta-Packages associated with this Kit Package Repository. Create the packages under the Kit Package Repository directory | ||||
|     #. Build the Kit Packages associated with this Kit Package Repository. Create the packages under the Kit Package Repository directory | ||||
|     #. Build the repository meta-data for the Kit Package Repository. The repository meta-data is based on the OS native package format. For example, for RHEL, we build the YUM repository meta-data with the createrepo command. | ||||
|   | ||||
|  | ||||
|   | ||||
| @@ -37,7 +37,7 @@ Follow these steps to complete the kit build process for a partial kit. | ||||
|   #. copy the partial kit to a working directory | ||||
|   #. copy the product software packages to a convenient location or locations | ||||
|   #. cd to the working directory | ||||
|   #. Build the complete kit tarfile  | ||||
|   #. Build the complete kit tarfile | ||||
|  | ||||
| :: | ||||
|  | ||||
|   | ||||
| @@ -5,5 +5,5 @@ After modifying the ``buildkit.conf`` file and copying all the necessary files t | ||||
|  | ||||
|     buildkit chkconfig | ||||
|  | ||||
| This command will verify all required fields defined in the buildkit.conf.  If errors are found, fix the specified error and rerun the command until all fields are validated.  | ||||
| This command will verify all required fields defined in the buildkit.conf.  If errors are found, fix the specified error and rerun the command until all fields are validated. | ||||
|  | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| Introduction  | ||||
| Introduction | ||||
| ============ | ||||
|  | ||||
| Contents | ||||
| @@ -6,10 +6,10 @@ Contents | ||||
|  | ||||
| A Software Kit is a tar file that contains the following: | ||||
|  | ||||
| **Kit Configuration File** --- A file describing the contents of this kit and contains following information  | ||||
| **Kit Configuration File** --- A file describing the contents of this kit and contains following information | ||||
|  | ||||
|   * Kit name, version, description, supported OS distributions, license information, and deployment parameters | ||||
|   * Kit repository information including name, supported OS distributions, and supported architectures  | ||||
|   * Kit repository information including name, supported OS distributions, and supported architectures | ||||
|   * Kit component information including name, version, description, server roles, scripts, and other data | ||||
|  | ||||
| **Kit Repositories** --- A directory for each operating system version this kit is supported in. Each directory contains all of the product software packages required for that environment along with repository metadata. | ||||
| @@ -42,14 +42,14 @@ Once the kit components are added to xCAT osimage definitions, administrators ca | ||||
| #. ``genimage`` command to create a diskless OS image installing the kit components for diskless OS provisioning | ||||
| #. ``updatenode`` command to install the kit components on existing deployed nodes | ||||
|  | ||||
| The ``kitcomponent`` metadata defines the kit packages as dependency packages and the OS package manager (``yum``, ``zypper``, ``apt-get``) automatically installes the required packages during the xCAT ``otherpkgs`` install process.  | ||||
| The ``kitcomponent`` metadata defines the kit packages as dependency packages and the OS package manager (``yum``, ``zypper``, ``apt-get``) automatically installes the required packages during the xCAT ``otherpkgs`` install process. | ||||
|  | ||||
| Kit Framework | ||||
| ------------- | ||||
|  | ||||
| With time, the implementation of the xCAT Software Kit support may change.   | ||||
| With time, the implementation of the xCAT Software Kit support may change. | ||||
|  | ||||
| In order to process a kit successfully, the kit must be compatible with the level of xCAT code that was used to build the kit.  The xCAT kit commands and software kits contain the framework version and compatible supported versions.  | ||||
| In order to process a kit successfully, the kit must be compatible with the level of xCAT code that was used to build the kit.  The xCAT kit commands and software kits contain the framework version and compatible supported versions. | ||||
|  | ||||
| To view the framework version, use the ``-v | --version`` option on :doc:`addkit </guides/admin-guides/references/man1/addkit.1>`  :: | ||||
|  | ||||
| @@ -59,7 +59,7 @@ To view the framework version, use the ``-v | --version`` option on :doc:`addkit | ||||
|             compatible_frameworks = 0,1,2 | ||||
|  | ||||
|  | ||||
| If the commands in the xCAT installation is not compatible with the Software Kit obtained, update xCAT to a more recent release.  | ||||
| If the commands in the xCAT installation is not compatible with the Software Kit obtained, update xCAT to a more recent release. | ||||
|  | ||||
|  | ||||
| .. [#] PCM is IBM Platform Cluster Manager  | ||||
| .. [#] PCM is IBM Platform Cluster Manager | ||||
|   | ||||
| @@ -24,6 +24,6 @@ updating diskful nodes | ||||
|  | ||||
| For existing active nodes, use the updatenode command to update the OS on those nodes. The updatenode command will use the osimage assigned to the node to determine the software to be updated. Once the osimage has been updated, make sure the correct image is assigned to the node and then run updatenode: :: | ||||
|  | ||||
|   chdef <nodelist> provmethod=<osimage>       | ||||
|   chdef <nodelist> provmethod=<osimage> | ||||
|   updatenode <nodelist> | ||||
|  | ||||
|   | ||||
| @@ -1,9 +1,9 @@ | ||||
| Quick Start Guide | ||||
| ================= | ||||
|  | ||||
| This quick start is provided to guide users through the steps required to install the IBM High Performance Computing (HPC) software stack on a cluster managed by xCAT. (*NOTE:* xCAT provides XLC and XLF partial kits, but all other HPC kits are provided by the HPC products teams, xCAT may not have any knowledges of their dependencies and requirements)   | ||||
| This quick start is provided to guide users through the steps required to install the IBM High Performance Computing (HPC) software stack on a cluster managed by xCAT. (*NOTE:* xCAT provides XLC and XLF partial kits, but all other HPC kits are provided by the HPC products teams, xCAT may not have any knowledges of their dependencies and requirements) | ||||
|  | ||||
| The following software kits will be used to install the IBM HPC software stack on to a RedHat Enterprise Linux 7.2 operating system running on ppc64le architecture.  | ||||
| The following software kits will be used to install the IBM HPC software stack on to a RedHat Enterprise Linux 7.2 operating system running on ppc64le architecture. | ||||
|  | ||||
|     * ``xlc-13.1.3-0-ppc64le.tar.bz2`` [1]_ | ||||
|     * ``xlf-15.1.3-0-ppc64le.tar.bz2`` [1]_ | ||||
| @@ -16,7 +16,7 @@ The following software kits will be used to install the IBM HPC software stack o | ||||
| .. [1] This guide assumes that the **complete** software kit is available for all the products listed below. For the IBM XL compilers, follow the :doc:`IBM XL Compiler </advanced/kit/hpc/software/compilers>` documentation to obtain the software and create the **complete** kit before proceeding. | ||||
|  | ||||
| 1. Using the ``addkit`` command, add each software kit package into xCAT: :: | ||||
|    | ||||
|  | ||||
|     addkit xlc-13.1.3-0-ppc64le.tar.bz2,xlf-15.1.3-0-ppc64le.tar.bz2 | ||||
|     addkit pperte-2.3.0.0-1547a-ppc64le.tar.bz2,pperte-2.3.0.2-s002a-ppc64le.tar.bz2 | ||||
|     addkit pessl-5.2.0-0-ppc64le.tar.bz2,essl-5.4.0-0-ppc64le.tar.bz2 | ||||
| @@ -25,15 +25,15 @@ The following software kits will be used to install the IBM HPC software stack o | ||||
|    The ``lskit`` command can be used to view the kits after adding to xCAT. | ||||
|  | ||||
|  | ||||
| 2. Using the ``addkitcomp`` command, add the kitcomponent to the target osimage.   | ||||
| 2. Using the ``addkitcomp`` command, add the kitcomponent to the target osimage. | ||||
|  | ||||
|    The order that the kit components are added to the osimage is important due to dependencies that kits may have with one another, a feature to help catch potential issues ahead of time.  There are a few different types of dependencies:  | ||||
|    The order that the kit components are added to the osimage is important due to dependencies that kits may have with one another, a feature to help catch potential issues ahead of time.  There are a few different types of dependencies: | ||||
|  | ||||
|       * **internal kit dependencies** - kit components within the software kit have dependencies.  For example, the software has a dependency on it's license component.  The ``-a`` option will automatically resolve internal kit dependencies. | ||||
|       * **external kit dependencies** - a software kit depends on another software provided in a separate kit.  The dependency kit must be added first.  ``addkitcomp`` will complain if it cannot resolve the dependency.  | ||||
|       * **runtime dependencies** - the software provided in the kit has rpm requirements for external 3rd party RPMs not shipped with the kit.  The administrator needs to configure these before deploying the osimage and ``addkitcomp`` cannot detect this dependencies.  | ||||
|       * **external kit dependencies** - a software kit depends on another software provided in a separate kit.  The dependency kit must be added first.  ``addkitcomp`` will complain if it cannot resolve the dependency. | ||||
|       * **runtime dependencies** - the software provided in the kit has rpm requirements for external 3rd party RPMs not shipped with the kit.  The administrator needs to configure these before deploying the osimage and ``addkitcomp`` cannot detect this dependencies. | ||||
|  | ||||
|   In the following examples, the ``rhels7.2-ppc64le-install-compute`` osimage is used and the ``-a`` option is specified to resolve internal dependencies.  | ||||
|   In the following examples, the ``rhels7.2-ppc64le-install-compute`` osimage is used and the ``-a`` option is specified to resolve internal dependencies. | ||||
|  | ||||
|     #. Add the **XLC** kitcomponents to the osimage:  :: | ||||
|  | ||||
| @@ -42,7 +42,7 @@ The following software kits will be used to install the IBM HPC software stack o | ||||
|  | ||||
|  | ||||
|     #. Add the **XLF** kitcomponents to the osimage:  :: | ||||
|    | ||||
|  | ||||
|         addkitcomp -a -i rhels7.2-ppc64le-install-compute \ | ||||
|             xlf.compiler-compute-15.1.3-0-rhels-7.2-ppc64le | ||||
|  | ||||
| @@ -59,11 +59,11 @@ The following software kits will be used to install the IBM HPC software stack o | ||||
|             min-pperte-compute-2.3.0.0-1547a-rhels-7.2-ppc64le | ||||
|  | ||||
|  | ||||
|     #. Add the PE RTE PTF2, **pperte-s002a**, kitcomponents to the osimage.  | ||||
|     #. Add the PE RTE PTF2, **pperte-s002a**, kitcomponents to the osimage. | ||||
|  | ||||
|        The PTF2 update requires the ``pperte-license`` component, which is provided by the GA software kit.  The ``addkitcomp -n`` option allows for multiple versions of the same kit component to be installed into the osimage.  If only the PTF2 version is intended to be installed, you can skip the previous step for adding the GA ppetre kit component, but the GA software kit must have been added to xCAT with the ``addkit`` command in order to resolve the license dependency.  :: | ||||
|  | ||||
|         addkitcomp -a -n -i rhels7.2-ppc64le-install-compute \  | ||||
|         addkitcomp -a -n -i rhels7.2-ppc64le-install-compute \ | ||||
|             pperte-login-2.3.0.2-s002a-rhels-7.2-ppc64le | ||||
|  | ||||
|         addkitcomp -a -n -i rhels7.2-ppc64le-install-compute \ | ||||
| @@ -73,7 +73,7 @@ The following software kits will be used to install the IBM HPC software stack o | ||||
|             min-pperte-compute-2.3.0.2-s002a-rhels-7.2-ppc64le | ||||
|  | ||||
|  | ||||
|     #. Add the **ESSL** kitcomponents to the osimage.   | ||||
|     #. Add the **ESSL** kitcomponents to the osimage. | ||||
|  | ||||
|        The ESSL software kit has an *external dependency* to the ``libxlf`` which is provided in the XLF software kit.  Since it's already added in the above step, there is no action needed here. | ||||
|  | ||||
| @@ -101,9 +101,9 @@ The following software kits will be used to install the IBM HPC software stack o | ||||
|         addkitcomp -a -i rhels7.2-ppc64le-install-compute \ | ||||
|             essl-computenode-3264rtecuda-5.4.0-0-rhels-7.2-ppc64le | ||||
|  | ||||
|       If the system doesn't have GPU and the CUDA toolkit is not needed,  the administrator should not add the following kit components that requires the CUDA packages: ``essl-loginnode-5.4.0-0-rhels-7.2-ppc64le``, ``essl-computenode-3264rte-5.4.0-0-rhels-7.2-ppc64le`` and ``essl-computenode-3264rtecuda-5.4.0-0-rhels-7.2-ppc64le``.  Check the ESSL installation guide: http://www.ibm.com/support/knowledgecenter/SSFHY8_5.4.0/com.ibm.cluster.essl.v5r4.essl300.doc/am5il_xcatinstall.htm  | ||||
|       If the system doesn't have GPU and the CUDA toolkit is not needed,  the administrator should not add the following kit components that requires the CUDA packages: ``essl-loginnode-5.4.0-0-rhels-7.2-ppc64le``, ``essl-computenode-3264rte-5.4.0-0-rhels-7.2-ppc64le`` and ``essl-computenode-3264rtecuda-5.4.0-0-rhels-7.2-ppc64le``.  Check the ESSL installation guide: http://www.ibm.com/support/knowledgecenter/SSFHY8_5.4.0/com.ibm.cluster.essl.v5r4.essl300.doc/am5il_xcatinstall.htm | ||||
|  | ||||
|     #. Add the **Parallel ESSL** kitcomponents to osimage.   | ||||
|     #. Add the **Parallel ESSL** kitcomponents to osimage. | ||||
|  | ||||
|        **Note:** ESSL kitcomponents are required for the PESSL.  :: | ||||
|  | ||||
| @@ -115,7 +115,7 @@ The following software kits will be used to install the IBM HPC software stack o | ||||
|  | ||||
|         addkitcomp -a -i rhels7.2-ppc64le-install-compute \ | ||||
|             pessl-computenode-3264rtempich-5.2.0-0-rhels-7.2-ppc64le | ||||
|   | ||||
|  | ||||
|  | ||||
|     #. Add the **PE DE** kitcomponents to osimage:  :: | ||||
|  | ||||
| @@ -124,11 +124,11 @@ The following software kits will be used to install the IBM HPC software stack o | ||||
|  | ||||
|         addkitcomp -a -i rhels7.2-ppc64le-install-compute \ | ||||
|             ppedev.compute-2.2.0-0-rhels-7.2-ppc64le | ||||
|      | ||||
|  | ||||
|  | ||||
| 3. The updated osimage now contains the configuration to install using xCAT software kits: :: | ||||
|  | ||||
|      lsdef -t osimage rhels7.2-ppc64le-install-compute  | ||||
|      lsdef -t osimage rhels7.2-ppc64le-install-compute | ||||
|         Object name: rhels7.2-ppc64le-install-compute | ||||
|         exlist=/install/osimages/rhels7.2-ppc64le-install-compute-kits/kits/KIT_COMPONENTS.exlist | ||||
|         imagetype=linux | ||||
| @@ -146,4 +146,4 @@ The following software kits will be used to install the IBM HPC software stack o | ||||
|         provmethod=install | ||||
|         template=/opt/xcat/share/xcat/install/rh/compute.rhels7.tmpl | ||||
|  | ||||
| 4. The osimage is now ready to deploy to the compute nodes.  | ||||
| 4. The osimage is now ready to deploy to the compute nodes. | ||||
|   | ||||
| @@ -1,13 +1,13 @@ | ||||
| IBM XL Compilers | ||||
| ================ | ||||
|  | ||||
| IBM provides XL compilers with advanced optimizing on IBM Power Systems running Linux.  | ||||
| IBM provides XL compilers with advanced optimizing on IBM Power Systems running Linux. | ||||
| For more information, http://www-03.ibm.com/software/products/en/xlcpp-linux | ||||
|  | ||||
| Partial Kits | ||||
| ------------ | ||||
|  | ||||
| The IBM XL compilers are dependencies for some of the HPC software products and is **not** available in xCAT Software Kit format.   | ||||
| The IBM XL compilers are dependencies for some of the HPC software products and is **not** available in xCAT Software Kit format. | ||||
|  | ||||
| To assist customers in creating a software kit for the IBM XL compilers, xCAT provides partial kits at: https://xcat.org/files/kits/hpckits/ | ||||
|  | ||||
| @@ -46,7 +46,7 @@ To use software kits that require compiler kit components, a compiler software k | ||||
|         xlsmp.lib-3.1.0.8-151013.ppc64.rpm | ||||
|         xlsmp.msg.rte-3.1.0.8-151013.ppc64.rpm | ||||
|         xlsmp.rte-3.1.0.8-151013.ppc64.rpm | ||||
|    | ||||
|  | ||||
| #. Obtain the corresponding compiler partial kit from https://xcat.org/files/kits/hpckits/. [#]_ | ||||
|  | ||||
|    **xlc-12.1.0.8-151013-ppc64.NEED_PRODUCT_PKGS.tar.bz2** is downloaded to ``/tmp/kits``: :: | ||||
| @@ -56,11 +56,11 @@ To use software kits that require compiler kit components, a compiler software k | ||||
|  | ||||
| #. Complete the partial kit by running the ``buildkit addpkgs`` command: :: | ||||
|  | ||||
|        buildkit addpkgs xlc-12.1.0.8-151013-ppc64.NEED_PRODUCT_PKGS.tar.bz2 \  | ||||
|        buildkit addpkgs xlc-12.1.0.8-151013-ppc64.NEED_PRODUCT_PKGS.tar.bz2 \ | ||||
|           --pkgdir /tmp/kits/xlc-12.1.0.8 | ||||
|  | ||||
|    Sample output: :: | ||||
|   | ||||
|  | ||||
|        Extracting tar file /tmp/kits/xlc-12.1.0.8-151013-ppc64.NEED_PRODUCT_PKGS.tar.bz2. Please wait. | ||||
|        Spawning worker 0 with 5 pkgs | ||||
|        Spawning worker 1 with 5 pkgs | ||||
| @@ -73,7 +73,7 @@ To use software kits that require compiler kit components, a compiler software k | ||||
|        Generating sqlite DBs | ||||
|        Sqlite DBs complete | ||||
|        Creating tar file /tmp/kits/xlc-12.1.0.8-151013-ppc64.tar.bz2. | ||||
|        Kit tar file /tmp/kits/xlc-12.1.0.8-151013-ppc64.tar.bz2 successfully built.  | ||||
|        Kit tar file /tmp/kits/xlc-12.1.0.8-151013-ppc64.tar.bz2 successfully built. | ||||
|  | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -10,4 +10,4 @@ Refer to the following pages for product specific details and known issues. | ||||
|    pe_rte.rst | ||||
|    pe_de.rst | ||||
|    essl.rst | ||||
|    pessl.rst  | ||||
|    pessl.rst | ||||
|   | ||||
| @@ -3,7 +3,7 @@ Parallel Environment Developer Edition (PE DE) | ||||
|  | ||||
| xCAT software kits for PE DE for Linux is available on: [#]_ | ||||
|  | ||||
|    * PE DE 1.2.0.1 and newer (SystemX)  | ||||
|    * PE DE 1.2.0.1 and newer (SystemX) | ||||
|    * PE DE 1.2.0.3 and newer (SystemP) | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -7,7 +7,7 @@ xCAT software kits for PE RTE for Linux is available on: [#]_ | ||||
|  | ||||
|  | ||||
|  | ||||
| PE RTE and ``mlnxofed_ib_install`` Conflict  | ||||
| PE RTE and ``mlnxofed_ib_install`` Conflict | ||||
| ------------------------------------------- | ||||
|  | ||||
| PPE requires the 32-bit version of ``libibverbs``.  The default behavior of the ``mlnxofed_ib_install`` postscript used to install the Mellanox OFED Infiniband (IB) driver is to remove any of the old IB related packages when installing.  To bypass this behavior, set the variable ``mlnxofed_options=--force`` when running the ``mlnxofed_ib_install`` script. | ||||
| @@ -18,8 +18,8 @@ Install Multiple Versions | ||||
|  | ||||
| Beginning with **PE RTE 1.2.0.10**, the packages are designed to allow for multiple versions of PE RTE to coexist on the same machine. | ||||
|  | ||||
| The default behavior of xCAT software kits is to only allow one version of a ``kitcomponent`` to be associated with an xCAT osimage.   | ||||
| When using ``addkitcomp`` to add a newer version of a kit component, xCAT will first remove the old version of the kit component before adding the new one.   | ||||
| The default behavior of xCAT software kits is to only allow one version of a ``kitcomponent`` to be associated with an xCAT osimage. | ||||
| When using ``addkitcomp`` to add a newer version of a kit component, xCAT will first remove the old version of the kit component before adding the new one. | ||||
|  | ||||
| To add multiple versions of PE RTE kit components to the same osimage, use the ``-n | --noupgrade`` option.  For example, to add PE RTE 1.3.0.1 and PE RTE 1.3.0.2 to the ``compute`` osimage: :: | ||||
|  | ||||
| @@ -36,11 +36,11 @@ When running parallel jobs, POE requires the user pass it a host list file.  xCA | ||||
| Known Issues | ||||
| ------------ | ||||
|  | ||||
| * **[PE RTE 1.3.0.7]** - For developers creating the complete software kit.  The src rpm is no longer required.   It is recommended to create the new software kit for PE RTE 1.3.0.7 from scratch and not to use the older kits as a starting point.  | ||||
| * **[PE RTE 1.3.0.7]** - For developers creating the complete software kit.  The src rpm is no longer required.   It is recommended to create the new software kit for PE RTE 1.3.0.7 from scratch and not to use the older kits as a starting point. | ||||
|  | ||||
| * **[PE RTE 1.3.0.7]** - When upgrading ``ppe_rte_man`` in a diskless image, there may be errors reported during the genimage process.  The new packages are actually upgraded, so the errors can be ignored with low risk.  | ||||
| * **[PE RTE 1.3.0.7]** - When upgrading ``ppe_rte_man`` in a diskless image, there may be errors reported during the genimage process.  The new packages are actually upgraded, so the errors can be ignored with low risk. | ||||
|  | ||||
| * **[PE RTE 1.3.0.1 to 1.3.0.6]** - When uninstalling or upgrading ppe_rte_man in an diskless image, ``genimage <osimage>`` may fail and stop an an error.  To workaround, simply rerun ``genimage <osimage>`` to finish the creation of the diskless image  | ||||
| * **[PE RTE 1.3.0.1 to 1.3.0.6]** - When uninstalling or upgrading ppe_rte_man in an diskless image, ``genimage <osimage>`` may fail and stop an an error.  To workaround, simply rerun ``genimage <osimage>`` to finish the creation of the diskless image | ||||
|  | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -1,9 +1,9 @@ | ||||
| Software Kits | ||||
| ============= | ||||
|  | ||||
| xCAT supports a unique software bundling concept called **software kits**.  Software kit combines all of the required product components (packages, license, configuration, scripts, etc) to assist the administrator in the installation of software onto machines managed by xCAT.  Software kits are made up of a collection of "kit components", each of which is tailored to one specific environment for that particular version of the software product.  | ||||
| xCAT supports a unique software bundling concept called **software kits**.  Software kit combines all of the required product components (packages, license, configuration, scripts, etc) to assist the administrator in the installation of software onto machines managed by xCAT.  Software kits are made up of a collection of "kit components", each of which is tailored to one specific environment for that particular version of the software product. | ||||
|  | ||||
| Prebuilt software kits are available as a tar file which can be downloaded and then added to the xCAT installation.  After the kits are added to xCAT, kit components are then added to specific xCAT osimages to automatically install the software bundled with the kit during OS deployment.  In some instances, software kits may be provided as partial kits.  Partial kits need additional effort to complete the kit before it can be used by xCAT.  | ||||
| Prebuilt software kits are available as a tar file which can be downloaded and then added to the xCAT installation.  After the kits are added to xCAT, kit components are then added to specific xCAT osimages to automatically install the software bundled with the kit during OS deployment.  In some instances, software kits may be provided as partial kits.  Partial kits need additional effort to complete the kit before it can be used by xCAT. | ||||
|  | ||||
| Software kits are supported for both diskful and diskless image provisioning. | ||||
|  | ||||
|   | ||||
| @@ -12,24 +12,24 @@ Backup Old xCAT Management Node | ||||
|  | ||||
| Backup xCAT management node data to backup server: | ||||
|  | ||||
| 1.1 Backup xCAT important files and directories:  | ||||
| 1.1 Backup xCAT important files and directories: | ||||
|  | ||||
|     #. Get ``installdir`` from ``site`` table, backup ``installdir`` directory,  | ||||
|     #. Get ``installdir`` from ``site`` table, backup ``installdir`` directory, | ||||
|        in this case, back up ``install`` directory: :: | ||||
|         | ||||
|  | ||||
|         lsdef -t site  clustersite -i installdir | ||||
|             Object name: clustersite | ||||
|             installdir=/install | ||||
|      | ||||
|     #. Backup these two xCAT directories: ::  | ||||
|  | ||||
|     #. Backup these two xCAT directories: :: | ||||
|  | ||||
|         ~/.xcat | ||||
|         /etc/xcat | ||||
|  | ||||
|        .. note:: Backing up ``~/.xcat`` is for all users who have xCAT client certs.  | ||||
|        .. note:: Backing up ``~/.xcat`` is for all users who have xCAT client certs. | ||||
|  | ||||
|     #. If there are customized files and directories for ``otherpkgdir``, ``pkgdir``, ``pkglist`` or ``template`` in some `osimage` definitions, backup these files and directories. for example: :: | ||||
|          | ||||
|  | ||||
|         lsdef -t osimage customized_rhels7.4-x86_64-install-compute -i otherpkgdir,pkgdir,pkglist,template | ||||
|             Object name: customized_rhels7.4-x86_64-install-compute | ||||
|                 otherpkgdir=/<customized_dir>/post/otherpkgs/rhels7.4/x86_64 | ||||
| @@ -87,10 +87,10 @@ Backup xCAT management node data to backup server: | ||||
|  | ||||
|     /etc/ntp.conf | ||||
|  | ||||
| 1.11 Backup database configure files (optional):  | ||||
| 1.11 Backup database configure files (optional): | ||||
|  | ||||
|     * **[PostgreSQL]** :: | ||||
|        | ||||
|  | ||||
|       /var/lib/pgsql/data/pg_hba.conf | ||||
|       /var/lib/pgsql/data/postgresql.conf | ||||
|  | ||||
| @@ -140,17 +140,17 @@ Restore xCAT management node | ||||
| 2.1 Power off old xCAT management server before configuring new xCAT management server | ||||
|  | ||||
| 2.2 Configure new xCAT management server using the same ip and hostname as old xCAT management server. Configure the same additional network for hardware management network if needed, for example, bmc network or hmc network. xCAT management server setup refer to :doc:`Prepare the Management Node <../../guides/install-guides/yum/prepare_mgmt_node>` | ||||
|      | ||||
|  | ||||
| 2.3 Overwrite files/directories methioned in above 1.2, 1.3, 1.4 from backup server to new xCAT management server | ||||
|  | ||||
| 2.4 Download xcat-core and xcat-dep tar ball, then install xCAT in new xCAT management server, refer to :doc:`install xCAT <../../guides/install-guides/yum/install>` | ||||
|  | ||||
| 2.5 Use ``rpm -qa|grep -i xCAT`` to list all xCAT RPMs in new xCAT management node, compare these RPMs base name with those in ``xcat_rpm_names`` from above 1.15. If some RPMs are missing, use ``yum install <rpm_package_basename>`` to install missing RPMs.  | ||||
| 2.5 Use ``rpm -qa|grep -i xCAT`` to list all xCAT RPMs in new xCAT management node, compare these RPMs base name with those in ``xcat_rpm_names`` from above 1.15. If some RPMs are missing, use ``yum install <rpm_package_basename>`` to install missing RPMs. | ||||
|  | ||||
| 2.6 If use ``MySQL``/``MariaDB``/``PostgreSQL``, migrate xCAT to use ``MySQL/MariaDB/PostgreSQL`` refer to :doc:`Configure a Database <../hierarchy/databases/index>` | ||||
|  | ||||
| 2.7 To restore the xCAT database | ||||
|    | ||||
|  | ||||
|     a. Restore xCAT database from the ``/dbbackup/db`` directory without ``auditlog`` and ``eventlog``, enter: :: | ||||
|  | ||||
|         restorexCATdb -p /dbbackup/db | ||||
| @@ -160,7 +160,7 @@ Restore xCAT management node | ||||
|         restorexCATdb -a -p /dbbackup/db | ||||
|  | ||||
|     c. (optinal) Overwrite files in above 1.11, restart ``PostgreSQL``: :: | ||||
|       | ||||
|  | ||||
|         service postgresql restart | ||||
|  | ||||
| 2.8 Overwrite remaining files/directories methioned in above 1.1, 1.5, 1.6, 1.7, 1.8, 1.9, 1.10, 1.12; If needed, check if files exist based on above 1.13 and 1.16. | ||||
|   | ||||
| @@ -3,11 +3,11 @@ Building Stateless/Diskless Images | ||||
|  | ||||
| A **stateless**, or **diskless**, provisioned nodes is one where the operating system image is deployed and loaded into memory.  The Operating System (OS) does not store its files directly onto persistent storage (i.e. hard disk drive, shared drive, usb, etc) and so subsequent rebooting of the machine results in loss of any state changes that happened while the machine was running. | ||||
|  | ||||
| To deploy stateless compute nodes, you must first create a stateless image.  The "netboot" osimages created from ``copycds`` in the **osimage** table are sample osimage definitions that can be used for deploying stateless nodes.  | ||||
| To deploy stateless compute nodes, you must first create a stateless image.  The "netboot" osimages created from ``copycds`` in the **osimage** table are sample osimage definitions that can be used for deploying stateless nodes. | ||||
|  | ||||
| In a homogeneous cluster, the management node is the same hardware architecture and running the same Operating System (OS) as the compute nodes, so ``genimage`` can directly be executed from the management node.  | ||||
| In a homogeneous cluster, the management node is the same hardware architecture and running the same Operating System (OS) as the compute nodes, so ``genimage`` can directly be executed from the management node. | ||||
|  | ||||
| The issues arises in a heterogeneous cluster, where the management node is running a different level operating system *or* hardware architecture as the compute nodes in which to deploy the image.  The ``genimage`` command that builds stateless images depends on various utilities provided by the base operating system and needs to be run on a node with the same hardware architecture and *major* Operating System release as the nodes that will be booted from the image.  | ||||
| The issues arises in a heterogeneous cluster, where the management node is running a different level operating system *or* hardware architecture as the compute nodes in which to deploy the image.  The ``genimage`` command that builds stateless images depends on various utilities provided by the base operating system and needs to be run on a node with the same hardware architecture and *major* Operating System release as the nodes that will be booted from the image. | ||||
|  | ||||
| Same Operating System, Different Architecture | ||||
| --------------------------------------------- | ||||
| @@ -19,9 +19,9 @@ The following describes creating stateless images of the same Operating System, | ||||
|  | ||||
| #. On xCAT management node, ``xcatmn``, select the osimage you want to create from the list of osimage definitions.  To list out the osimage definitions: :: | ||||
|  | ||||
|         lsdef -t osimage  | ||||
|         lsdef -t osimage | ||||
|  | ||||
| #. **optional:** Create a copy of the osimage definition that you want to modify.   | ||||
| #. **optional:** Create a copy of the osimage definition that you want to modify. | ||||
|  | ||||
|    To take the sample ``rhels6.3-x86_64-netboot-compute`` osimage definition and create a copy called ``mycomputeimage``, run the following command: :: | ||||
|  | ||||
| @@ -38,15 +38,15 @@ The following describes creating stateless images of the same Operating System, | ||||
|         ./genimage -a x86_64 -o rhels6.3 -p compute --permission 755 --srcdir /install/rhels6.3/x86_64 --pkglist \ | ||||
|         /opt/xcat/share/xcat/netboot/rh/compute.rhels6.x86_64.pkglist --otherpkgdir /install/post/otherpkgs/rhels6.3/x86_64 --postinstall \ | ||||
|         /opt/xcat/share/xcat/netboot/rh/compute.rhels6.x86_64.postinstall --rootimgdir /install/netboot/rhels6.3/x86_64/compute mycomputeimage | ||||
|   | ||||
|            | ||||
|  | ||||
|  | ||||
| #. Go to the target node, ``n01`` and run the following: | ||||
|  | ||||
|    #. mount the ``/install`` directory from the xCAT Management Node: :: | ||||
|          | ||||
|  | ||||
|        mkdir /install | ||||
|        mount -o soft xcatmn:/install /install | ||||
|          | ||||
|  | ||||
|    #. Copy the executable files from the ``/opt/xcat/share/xcat/netboot`` from the xCAT Management node to the target node: :: | ||||
|  | ||||
|        mkdir -p /opt/xcat/share/xcat/ | ||||
| @@ -61,7 +61,7 @@ The following describes creating stateless images of the same Operating System, | ||||
|  | ||||
|  | ||||
|    **If problems creating the stateless image, provide a local directory for --rootimgdir:** :: | ||||
|    | ||||
|  | ||||
|         mkdir -p /tmp/compute | ||||
|  | ||||
|    Rerun ``genimage``, replacing ``--rootimgdir /tmp/compute``: :: | ||||
| @@ -70,8 +70,8 @@ The following describes creating stateless images of the same Operating System, | ||||
|         ./genimage -a x86_64 -o rhels6.3 -p compute --permission 755 --srcdir /install/rhels6.3/x86_64 --pkglist \ | ||||
|          /opt/xcat/share/xcat/netboot/rh/compute.rhels6.x86_64.pkglist --otherpkgdir /install/post/otherpkgs/rhels6.3/x86_64 --postinstall \ | ||||
|          /opt/xcat/share/xcat/netboot/rh/compute.rhels6.x86_64.postinstall --rootimgdir /tmp/compute mycomputeimage | ||||
|   | ||||
|    Then copy the contents from ``/tmp/compute`` to ``/install/netboot/rhels6.3/compute``  | ||||
|  | ||||
|    Then copy the contents from ``/tmp/compute`` to ``/install/netboot/rhels6.3/compute`` | ||||
|  | ||||
|  | ||||
| #. Now return to the management node and execute ``packimage`` on the osimage and continue provisioning the node :: | ||||
|   | ||||
| @@ -8,8 +8,8 @@ Troubleshooting | ||||
|  | ||||
|     Error: Unable to find pxelinux.0 at /opt/xcat/share/xcat/netboot/syslinux/pxelinux.0 | ||||
|  | ||||
| **Resolution:**  | ||||
| **Resolution:** | ||||
|  | ||||
| The syslinux network booting files are missing.   | ||||
| The syslinux network booting files are missing. | ||||
| Install the sylinux-xcat package provided in the xcat-deps repository: ``yum -y install syslinux-xcat`` | ||||
|  | ||||
|   | ||||
| @@ -4,4 +4,4 @@ x86 Management Node | ||||
| .. toctree:: | ||||
|    :maxdepth: 2 | ||||
|  | ||||
|     | ||||
|  | ||||
|   | ||||
| @@ -9,9 +9,9 @@ It is recommended that lldp protocol in the switches is enabled to collect the s | ||||
|  | ||||
| xCAT will use the ethernet switches during node discovery to find out which switch port a particular MAC address is communicating over. This allows xCAT to match a random booting node with the proper node name in the database. To set up a switch, give it an IP address on its management port and enable basic **SNMP** functionality. (Typically, the **SNMP** agent in the switches is disabled by default.) The easiest method is to configure the switches to give the **SNMP** version 1 community string called "public" read access. This will allow xCAT to communicate to the switches without further customization. (xCAT will get the list of switches from the **switch** table.) If you want to use **SNMP** version 3 (e.g. for better security), see the example below. With **SNMP** V3 you also have to set the user/password and AuthProto (default is **md5**) in the switches table. | ||||
|  | ||||
| If for some reason you can't configure **SNMP** on your switches, you can use sequential discovery or the more manual method of entering the nodes' MACs into the database.  | ||||
| If for some reason you can't configure **SNMP** on your switches, you can use sequential discovery or the more manual method of entering the nodes' MACs into the database. | ||||
|  | ||||
| **SNMP** V3 Configuration example:    | ||||
| **SNMP** V3 Configuration example: | ||||
|  | ||||
| xCAT supports many switch types, such as **BNT** and **Cisco**. Here is an example of configuring **SNMP V3** on the **Cisco** switch 3750/3650: | ||||
|  | ||||
| @@ -62,14 +62,14 @@ Switch Management | ||||
|  | ||||
| When managing Ethernet switches, the admin often logs into the switches one by one using SSH or Telnet and runs the switch commands. However, it becomes time consuming when there are a lot of switches in a cluster. In a very large cluster, the switches are often identical and the configurations are identical. It helps to configure and monitor them in parallel from a single command. | ||||
|  | ||||
| For managing Mellanox IB switches and  Qlogic IB switches, see :doc:`Mellanox IB switches and Qlogic IB switches </advanced/networks/infiniband/index>`  | ||||
| For managing Mellanox IB switches and  Qlogic IB switches, see :doc:`Mellanox IB switches and Qlogic IB switches </advanced/networks/infiniband/index>` | ||||
|  | ||||
| xCAT will not do a lot of switch management functions. Instead, it will configure the switch so that the admin can run remote command such as ``xdsh`` for it. Thus, the admin can use the ``xdsh`` to run proprietary switch commands remotely from the xCAT mn to enable **VLAN**, **bonding**, **SNMP** and others. | ||||
|  | ||||
| Running Remote Commands in Parallel | ||||
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
|  | ||||
| You can use xdsh to run parallel commands on Ethernet switches. The following shows how to configure xCAT to run xdsh on the switches:  | ||||
| You can use xdsh to run parallel commands on Ethernet switches. The following shows how to configure xCAT to run xdsh on the switches: | ||||
|  | ||||
| .. note:: For this to work, configure the switch to allow **ssh** or **telnet**. The procedure varies from switch to switch, consult the reference guides for your switch to find out how to do this. | ||||
|  | ||||
| @@ -82,7 +82,7 @@ Set the ssh or telnet username an d password. :: | ||||
|        chdef bntc125 username=admin \ | ||||
|                      password=password \ | ||||
|                      protocol=ssh | ||||
|        or  | ||||
|        or | ||||
|        chdef bntc125 username=admin \ | ||||
|                      password=password \ | ||||
|                      protocol=telnet | ||||
| @@ -101,8 +101,8 @@ Set the ssh or telnet username an d password. :: | ||||
|  | ||||
| Also note that --devicetype is used here. xCAT supports the following switch types out of the box: :: | ||||
|  | ||||
|              * BNT  | ||||
|              * Cisco  | ||||
|              * BNT | ||||
|              * Cisco | ||||
|              * Juniper | ||||
|              * Mellanox (for IB and Ethernet switches) | ||||
|  | ||||
| @@ -121,7 +121,7 @@ Here is what result will look like: :: | ||||
|        bntc125: end | ||||
|        bntc125: show vlan | ||||
|        bntc125: VLAN                Name                Status            Ports | ||||
|        bntc125:  ----  --------------------------------  ------  ------------------------  | ||||
|        bntc125:  ----  --------------------------------  ------  ------------------------ | ||||
|        bntc125:  1     Default VLAN                      ena     45-XGE4 | ||||
|        bntc125:  3     VLAN 3                            dis     empty | ||||
|        bntc125:  101   xcatpriv101                       ena     24-44 | ||||
|   | ||||
| @@ -39,4 +39,4 @@ Below are the information ``getadapter`` trying to inspect: | ||||
|  | ||||
| * **candidatename**: All the names which satisfy predictable network device naming scheme, if customer needs to customize their network adapter name, they can choose one of them. (``confignetwork`` needs to do more work to support this. if customer want to use their own name, xcat should offer a interface to get customer’s input and change this column) | ||||
|  | ||||
| * **linkstate**:  The link state of network device | ||||
| * **linkstate**:  The link state of network device | ||||
|   | ||||
| @@ -21,9 +21,9 @@ Burn new firmware on each ibaX: :: | ||||
|  | ||||
| 	mstflint -d 0002:01:00.0 -i <image location> b | ||||
|  | ||||
| .. note:: If this is a PureFlex MezzanineP adapter, you must select the correct image for each ibaX device.  | ||||
| .. note:: If this is a PureFlex MezzanineP adapter, you must select the correct image for each ibaX device. | ||||
|  | ||||
|           The difference in the firmware image at the end of the file name:  | ||||
|           The difference in the firmware image at the end of the file name: | ||||
|             * _0.bin (iba0/iba2) | ||||
|             * _1.bin (iba1/iba3) | ||||
|  | ||||
| @@ -96,7 +96,7 @@ Save the changes made for new IB image: :: | ||||
| 	configuration write | ||||
|  | ||||
| Activate the new IB image (reboot switch): :: | ||||
|        | ||||
|  | ||||
| 	reload | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -1,11 +1,11 @@ | ||||
| Configuration | ||||
| ============= | ||||
|  | ||||
| The process to configure the osimage to install the Mellanox OFED Drivers for Diskful and Diskless scenarios are outlined below.  | ||||
| The process to configure the osimage to install the Mellanox OFED Drivers for Diskful and Diskless scenarios are outlined below. | ||||
|  | ||||
| .. toctree:: | ||||
|    :maxdepth: 2 | ||||
|  | ||||
|    mlnxofed_ib_install_v2_diskful.rst | ||||
|    mlnxofed_ib_install_v2_diskless.rst | ||||
|     | ||||
|  | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| Mellanox OFED Installation Script | ||||
| ================================= | ||||
|  | ||||
| Mellanox provides a tested and packaged version of the OpenFabrics Enterprise Distribution (OFED) driver, named Mellanox OFED (MLNX_OFED).  To assist with the installation of the MLNX_OFED driver, xCAT provides a sample postscript: ``mlnxofed_ib_install.v2``.  | ||||
| Mellanox provides a tested and packaged version of the OpenFabrics Enterprise Distribution (OFED) driver, named Mellanox OFED (MLNX_OFED).  To assist with the installation of the MLNX_OFED driver, xCAT provides a sample postscript: ``mlnxofed_ib_install.v2``. | ||||
|  | ||||
| .. toctree:: | ||||
|    :maxdepth: 2 | ||||
| @@ -10,4 +10,4 @@ Mellanox provides a tested and packaged version of the OpenFabrics Enterprise Di | ||||
|    mlnxofed_configuration.rst | ||||
|    mlnxofed_ib_verified_scenario_matrix.rst | ||||
|    mlnxofed_ib_known_issue.rst | ||||
|     | ||||
|  | ||||
|   | ||||
| @@ -11,16 +11,16 @@ Diskful Installation | ||||
|  | ||||
| #. Configure the ``mlnxofed_ib_install`` script to install the MNLX_OFED drivers | ||||
|  | ||||
|    xCAT has a concept of postscripts that can be used to customize the node after the operating system is installed.   | ||||
|    xCAT has a concept of postscripts that can be used to customize the node after the operating system is installed. | ||||
|  | ||||
|    Mellanox recommends that the operating system is rebooted after the drivers are installed, so xCAT recommends using the ``postscripts`` attribute to avoid the need for a second reboot.  To invoke the ``mlnxofed_ib_install`` as a postscript :: | ||||
|   | ||||
|        chdef -t node -o <node_name> \  | ||||
|  | ||||
|        chdef -t node -o <node_name> \ | ||||
|           -p postscripts="mlnxofed_ib_install -p /install/<path-to>/<MLNX_OFED_LINUX.iso>" | ||||
|  | ||||
|    **[kernel mismatch issue]** The Mellanox OFED ISO is built against a series of specific kernel version.  If the version of the linux kernel does not match any of the Mellanox offered pre-built kernel modules, you can pass the ``--add-kernel-support --force`` argument to the Mellanox installation script to build the kernel modules based on the version you are using. :: | ||||
|  | ||||
|        chdef -t node -o <node_name> \  | ||||
|        chdef -t node -o <node_name> \ | ||||
|           -p postscripts="mlnxofed_ib_install -p /install/<path-to>/<MLNX_OFED_LINUX.iso> \ | ||||
|           -m --add-kernel-support --force -end-" | ||||
|  | ||||
| @@ -37,8 +37,8 @@ Diskful Installation | ||||
|          service openibd status | ||||
|  | ||||
|      systemd: :: | ||||
|      | ||||
|          systemctl status openibd.service  | ||||
|  | ||||
|          systemctl status openibd.service | ||||
|  | ||||
|    * Verify that the Mellanox IB drivers are located at: ``/lib/modules/<kernel_version>/extra/`` | ||||
|  | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| Diskless Installation | ||||
| ===================== | ||||
|  | ||||
| #. Prepare dependency packages in the pkglist  | ||||
| #. Prepare dependency packages in the pkglist | ||||
|  | ||||
|    In order for the Mellanox installation script to execute successfully, certain dependency packages are required to be installed on the compute node.  xCAT provides sample package list files to help resolve these dependencies.  The samples are located at ``/opt/xcat/share/xcat/ib/netboot/<os>/``. | ||||
|  | ||||
| @@ -11,26 +11,26 @@ Diskless Installation | ||||
|  | ||||
| #. Configure the ``mlnxofed_ib_install`` script to install the MNLX_OFED drivers | ||||
|  | ||||
|    Edit the ``postinstall`` script on the osimage to invoke the ``mlnxofed_ib_install`` install script.   | ||||
|    Edit the ``postinstall`` script on the osimage to invoke the ``mlnxofed_ib_install`` install script. | ||||
|  | ||||
|        For example, take ``rhels7.2-ppc64le-netboot-compute``:  | ||||
|        For example, take ``rhels7.2-ppc64le-netboot-compute``: | ||||
|  | ||||
|            #. Find the path to the ``postinstall`` script: :: | ||||
|  | ||||
|            #. Find the path to the ``postinstall`` script: ::  | ||||
|      | ||||
|                   # lsdef -t osimage -o rhels7.2-ppc64le-netboot-compute -i postinstall | ||||
|                   Object name: rhels7.2-ppc64le-netboot-compute | ||||
|                       postinstall=/opt/xcat/share/xcat/netboot/rh/compute.rhels7.ppc64le.postinstall | ||||
|  | ||||
|            #. Edit the ``/opt/xcat/share/xcat/netboot/rh/compute.rhels7.ppc64le.postinstall`` and add the following: :: | ||||
|      | ||||
|  | ||||
|                   /install/postscripts/mlnxofed_ib_install \ | ||||
|                      -p /install/<path-to>/<MLNX_OFED_LINUX.iso> -i $1 -n genimage | ||||
|      | ||||
|  | ||||
|               .. note:: The ``$1`` is a argument that is passed to the the postinstall script at runtime. | ||||
|  | ||||
|    .. tip:: **Kernel Mismatch**  | ||||
|        | ||||
|             The Mellanox OFED ISO is built against a series of specific kernel version. If the version of the linux kernel being used does not match any of the pre-built kernels, pass ``--add-kernel-support --without-32bit --without-fw-update --force`` to the Mellanox installation script to build the kernel modules based on the kernel you are using.  Example: ::  | ||||
|    .. tip:: **Kernel Mismatch** | ||||
|  | ||||
|             The Mellanox OFED ISO is built against a series of specific kernel version. If the version of the linux kernel being used does not match any of the pre-built kernels, pass ``--add-kernel-support --without-32bit --without-fw-update --force`` to the Mellanox installation script to build the kernel modules based on the kernel you are using.  Example: :: | ||||
|  | ||||
|                  /install/postscripts/mlnxofed_ib_install \ | ||||
|                  -p /install/<path-to>/<MLNX_OFED_LINUX.iso> -m --add-kernel-support --without-32bit --without-fw-update --force -end- \ | ||||
| @@ -39,7 +39,7 @@ Diskless Installation | ||||
| #. Generate the diskless image  | ||||
|  | ||||
|    Use the ``genimage`` command to generate the diskless image from the osimage definition :: | ||||
|          | ||||
|  | ||||
| 	genimage <osimage> | ||||
|  | ||||
|    Use the ``packimage`` command to pack the diskless image for deployment :: | ||||
| @@ -49,7 +49,7 @@ Diskless Installation | ||||
| #. Provision the node :: | ||||
|  | ||||
|      rinstall <node> osimage=rhels7.2-ppc64le-netboot-compute | ||||
|     | ||||
|  | ||||
| #. Verification | ||||
|  | ||||
|    * Check the status of ``openibd`` service | ||||
| @@ -59,8 +59,8 @@ Diskless Installation | ||||
|          service openibd status | ||||
|  | ||||
|      systemd: :: | ||||
|      | ||||
|          systemctl status openibd.service  | ||||
|  | ||||
|          systemctl status openibd.service | ||||
|  | ||||
|    * Verify that the Mellanox IB drivers are located at: ``/lib/modules/<kernel_version>/extra/`` | ||||
|  | ||||
|   | ||||
| @@ -4,7 +4,7 @@ Preparation | ||||
| Download MLNX_OFED ISO | ||||
| ---------------------- | ||||
|  | ||||
| **xCAT only supports installation using the ISO format.**  | ||||
| **xCAT only supports installation using the ISO format.** | ||||
|  | ||||
| Download the Mellanox OFED ISO file `here (MLNX_OFED) <http://www.mellanox.com/page/products_dyn?product_family=26&mtag=linux_sw_drivers>`_. | ||||
|  | ||||
| @@ -22,7 +22,7 @@ The ``mlnxofed_ib_install.v2`` is a sample script intended to assist with the in | ||||
|        # ensure the script has execute permission | ||||
|        chmod +x /install/postscripts/mlnxofed_ib_install | ||||
|  | ||||
| #. Familiarize the options available for the xCAT ``mlnxofed_ib_install`` script.  | ||||
| #. Familiarize the options available for the xCAT ``mlnxofed_ib_install`` script. | ||||
|  | ||||
|    +---------+------------------+----------------------------------------------------------+ | ||||
|    | Option  | Required         | Description                                              | | ||||
| @@ -55,5 +55,5 @@ The ``mlnxofed_ib_install.v2`` is a sample script intended to assist with the in | ||||
|    To pass the ``--add-kernel-support`` option to ``mlnxofedinstall``, use the following command: :: | ||||
|  | ||||
|        /install/postscripts/mlnxofed_ib_install -p /install/<path-to>/<MLNX_OFED_LINUX.iso> \ | ||||
|            -m --without-32bit --without-fw-update --add-kernel-support --force -end-  | ||||
|            -m --without-32bit --without-fw-update --add-kernel-support --force -end- | ||||
|  | ||||
|   | ||||
| @@ -4,16 +4,16 @@ Known Issues | ||||
| Preventing upgrade of the Mellanox Drivers | ||||
| ------------------------------------------ | ||||
|  | ||||
| On RedHat operating systems, after the Mellanox drivers are installed, you may have a requirement to update your operating system to a later version.  | ||||
| Some operating systems may ship InfiniBand drivers that are higher version than the Mellanox drivers you have installed and therefor may update the existing drivers.  | ||||
| On RedHat operating systems, after the Mellanox drivers are installed, you may have a requirement to update your operating system to a later version. | ||||
| Some operating systems may ship InfiniBand drivers that are higher version than the Mellanox drivers you have installed and therefor may update the existing drivers. | ||||
|  | ||||
| To prevent this from happening, add the following in the ``/etc/yum.conf`` :: | ||||
|  | ||||
|     exclude=dapl* libib* ibacm infiniband* libmlx* librdma* opensm* ibutils* | ||||
|  | ||||
|  | ||||
| Development packages in SLES  | ||||
| Development packages in SLES | ||||
| ---------------------------- | ||||
|  | ||||
| If using the ``--add-kernel-support`` attribute on SLES operating systems, you may find problems with installing some dependency packages which are not shipped by the SLES server DVDs.  The development rpms are provided by the SDK DVDs.  Refer to :doc:`Add Additional Software Packages </guides/admin-guides/manage_clusters/ppc64le/diskful/customize_image/additional_pkg>` to configure the SDK repositories.  | ||||
| If using the ``--add-kernel-support`` attribute on SLES operating systems, you may find problems with installing some dependency packages which are not shipped by the SLES server DVDs.  The development rpms are provided by the SDK DVDs.  Refer to :doc:`Add Additional Software Packages </guides/admin-guides/manage_clusters/ppc64le/diskful/customize_image/additional_pkg>` to configure the SDK repositories. | ||||
|  | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| MLNX_OFED Support Matrix | ||||
| ======================== | ||||
|  | ||||
| The following ISO images and attributes have been verified by the xCAT Team.  | ||||
| The following ISO images and attributes have been verified by the xCAT Team. | ||||
|  | ||||
| **RedHat Enterprise Linux** | ||||
|  | ||||
|   | ||||
| @@ -11,7 +11,7 @@ If your target Mellanox IB adapter has 2 ports, and you plan to give port ib0 4 | ||||
|  | ||||
| 1. Define your networks in networks table :: | ||||
|  | ||||
| 	chdef -t network -o ib0ipv41 net=20.0.0.0 mask=255.255.255.0 mgtifname=ib0  | ||||
| 	chdef -t network -o ib0ipv41 net=20.0.0.0 mask=255.255.255.0 mgtifname=ib0 | ||||
| 	chdef -t network -o ib0ipv42 net=30.0.0.0 mask=255.255.255.0 mgtifname=ib0 | ||||
| 	chdef -t network -o ib0ipv61 net=1:2::/64 mask=/64 mgtifname=ib0 gateway=1:2::2 | ||||
| 	chdef -t network -o ib0ipv62 net=2:2::/64 mask=/64 mgtifname=ib0 gateway= | ||||
|   | ||||
| @@ -18,7 +18,7 @@ Add the login user name and password to the switches table: :: | ||||
|  | ||||
| The switches table will look like this: :: | ||||
|  | ||||
| 	#switch,...,sshusername,sshpassword,switchtype,....   | ||||
| 	#switch,...,sshusername,sshpassword,switchtype,.... | ||||
| 	"mswitch",,,,,,,"admin","admin","MellanoxIB",, | ||||
|  | ||||
| If there is only one admin and one password for all the switches then put the entry in the xCAT passwd table for the admin id and password to use to login. :: | ||||
|   | ||||
| @@ -75,4 +75,4 @@ If it is not running, then run the following commands: :: | ||||
| 	monadd snmpmon | ||||
| 	monstart snmpmon | ||||
|  | ||||
| 	 | ||||
| 	 | ||||
|   | ||||
| @@ -1,13 +1,13 @@ | ||||
| Open Network Install Environment Switches  | ||||
| Open Network Install Environment Switches | ||||
| ========================================= | ||||
|  | ||||
| The Open Network Install Environment, or "ONIE" [1]_. is an open source project definining an **install environment** for bare metal switches.  This environment allows choice for the end users when selecting a network operating system to install onto these bare metal switches.   | ||||
| The Open Network Install Environment, or "ONIE" [1]_. is an open source project definining an **install environment** for bare metal switches.  This environment allows choice for the end users when selecting a network operating system to install onto these bare metal switches. | ||||
|  | ||||
| .. toctree:: | ||||
|    :maxdepth: 2 | ||||
|  | ||||
|    os_cumulus/index.rst  | ||||
|    os_cumulus/ztp.rst  | ||||
|    os_cumulus/index.rst | ||||
|    os_cumulus/ztp.rst | ||||
|  | ||||
|  | ||||
| .. [1] Open Network Install Environment: Created by Cumulus Networks, Inc. in 2012, the Open Network Install Environment (ONIE) Project is a small operating system, pre-installed as firmware on bare metal network switches, that provides an environment for automated operating system provisioning. | ||||
|   | ||||
| @@ -10,18 +10,18 @@ xCAT provides support for detecting and installing the Cumulus Linux OS into ONI | ||||
|  | ||||
| #. Create a pre-defined switch definition for the ONIE switch using the ``onieswitch`` template. | ||||
|  | ||||
|    The mac address of the switch management port is required for xCAT to configure the DHCP information and send over the OS to install on the switch.  | ||||
|    The mac address of the switch management port is required for xCAT to configure the DHCP information and send over the OS to install on the switch. | ||||
|  | ||||
|    **Small Clusters**  | ||||
|    **Small Clusters** | ||||
|  | ||||
|    If you know the mac address of the management port on the switch, create the pre-defined switch definition providing the mac address. :: | ||||
|  | ||||
|        mkdef frame01sw1 --template onieswitch arch=armv71 \ | ||||
|            ip=192.168.1.1 mac="aa:bb:cc:dd:ee:ff" | ||||
|  | ||||
|    **Large Clusters**  | ||||
|    **Large Clusters** | ||||
|  | ||||
|    xCAT's :doc:`switchdiscover </guides/admin-guides/references/man1/switchdiscover.1>` command can be used to discover the mac address and fill in the predefined switch definitions based on the switch/switchport mapping.   | ||||
|    xCAT's :doc:`switchdiscover </guides/admin-guides/references/man1/switchdiscover.1>` command can be used to discover the mac address and fill in the predefined switch definitions based on the switch/switchport mapping. | ||||
|  | ||||
|  | ||||
|     #. Define all the switch objects providing the switch/switchport mapping: :: | ||||
| @@ -34,8 +34,8 @@ xCAT provides support for detecting and installing the Cumulus Linux OS into ONI | ||||
|              ip=192.168.3.1 switch=coresw1 switchport=3 | ||||
|          mkdef frame04sw1 --template onieswitch arch=armv71 \ | ||||
|              ip=192.168.4.1 switch=coresw1 switchport=4 | ||||
|          ...  | ||||
|    | ||||
|          ... | ||||
|  | ||||
|     #. Leverage ``switchdiscover`` over the DHCP range to automatically detect the MAC address and write them into the predefined switches above. :: | ||||
|  | ||||
|          switchdiscover --range <IP range> | ||||
| @@ -43,9 +43,9 @@ xCAT provides support for detecting and installing the Cumulus Linux OS into ONI | ||||
|  | ||||
| #. Run the ``nodeset`` command to set the ``provmethod`` attribute of the target switch(es) to the Cumulus Linux install image and prepare the DHCP/BOOTP lease information for the switch:  :: | ||||
|  | ||||
|     # nodeset frame01sw1 osimage=cumulus3.5.2-armel  | ||||
|     # nodeset frame01sw1 osimage=cumulus3.5.2-armel | ||||
|     # lsdef frame01sw1 | ||||
|     Object name: frame01sw1  | ||||
|     Object name: frame01sw1 | ||||
|     arch=armv7l | ||||
|     groups=switch,edge_switch | ||||
|     ip=172.21.208.03 | ||||
| @@ -82,11 +82,11 @@ Configure xCAT Remote Commands | ||||
|  | ||||
| After Cumulus Linux OS is installed, a default user ``cumulus`` will be created with default password: ``CumulusLinux!``. | ||||
|  | ||||
| To ease in the management of the switch, xCAT provides a script to help configure password-less ssh as the ``root`` user.  This script sends over the xCAT ssh keys so that the xCAT remote commands (``xdsh``, ``xdcp``, etc) can be run against the ONIE switches.   | ||||
| To ease in the management of the switch, xCAT provides a script to help configure password-less ssh as the ``root`` user.  This script sends over the xCAT ssh keys so that the xCAT remote commands (``xdsh``, ``xdcp``, etc) can be run against the ONIE switches. | ||||
|  | ||||
| Execute the following to sync the xCAT keys to the switch: :: | ||||
|  | ||||
|     /opt/xcat/share/xcat/scripts/configonie --switches frame01sw1 --ssh  | ||||
|     rspconfig frame01sw1 sshcfg  | ||||
|  | ||||
| Validate the ssh keys are correctly configured by running a ``xdsh`` command: :: | ||||
|  | ||||
| @@ -98,7 +98,7 @@ Validate the ssh keys are correctly configured by running a ``xdsh`` command: :: | ||||
| Activate the License | ||||
| -------------------- | ||||
|  | ||||
| After Cumulus Linux OS is installed onto the ONIE switch, only the serial port console and the management ethernet port is enabled.  To activate the rest of the switch ports, the license file needs to be installed onto the switch.  | ||||
| After Cumulus Linux OS is installed onto the ONIE switch, only the serial port console and the management ethernet port is enabled.  To activate the rest of the switch ports, the license file needs to be installed onto the switch. | ||||
|  | ||||
| #. Copy the license file to the switch: :: | ||||
|  | ||||
| @@ -159,6 +159,6 @@ To verify the SNMPv3 configuration, run ``xcatprobe switch_macmap`` command, wil | ||||
|     ...........................more output..................... | ||||
|  | ||||
|  | ||||
|   | ||||
|  | ||||
|  | ||||
|  | ||||
|   | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user