2
0
mirror of https://github.com/xcat2/xcat-core.git synced 2025-05-21 19:22:05 +00:00

Merge branch 'master' into sle15support

This commit is contained in:
Yuan Bai 2018-08-20 17:10:12 +08:00 committed by GitHub
commit d94406edae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1645 changed files with 23564 additions and 22975 deletions

View File

@ -1,12 +1,12 @@
os: linux
dist: trusty
sudo: required
before_install:
before_install:
- sudo apt-get install -y git reprepro devscripts debhelper libsoap-lite-perl libdbi-perl quilt openssh-server dpkg looptools genometools software-properties-common
- perl -v
- echo "yes" | sudo cpan -f -i Capture::Tiny
script:
script:
- echo $TRAVIS_BUILD_ID
- echo $TRAVIS_EVENT_TYPE
- echo $TRAVIS_BUILD_NUMBER

View File

@ -8,7 +8,7 @@
# Getting Started:
# - Clone the xcat-core git repository under a directory named "xcat-core/src"
# - make sure reprepro is installed on the build machine
# - Run this script from the local git repository you just created.
# - Run this script from the local git repository you just created.
# ./build-ubunturepo -c BUILDALL=1
# Usage: attr=value attr=value ... ./build-ubunturepo { -c | -d }
@ -22,13 +22,12 @@
# When you are ready to release this build, use PROMOTE=1 without PREGA
# BUILDALL=1 - build all rpms, whether they changed or not. Should be used for snap builds that are in
# prep for a release.
# GPGSIGN=0 - Do not sign the repo in the end of the build. The repo will be signed by default
#
# GPGSIGN=0 - Do not sign the repo in the end of the build. The repo will be signed by default
#
# LOG=<filename> - provide an LOG file option to redirect some output into log file
#
# DEST=<directory> - provide a directory to contains the build result
#
# BUILDDESTDIR=<build_tarball_dir> - Copy build core tarball to BUILDDESTDIR if defined
# For the dependency packages 1. All the xcat dependency deb packages should be uploaded to
# "pokgsa/projects/x/xcat/build/ubuntu/xcat-dep/debs/" on GSA
# 2. run ./build-ubunturepo -d
@ -105,6 +104,9 @@ if [ "$c_flag" -a "$d_flag" ];then
exit 2
fi
if [ -z "$BUILDALL" ]; then
BUILDALL=1
fi
# Find where this script is located to set some build variables
old_pwd=`pwd`
@ -332,7 +334,7 @@ __EOF__
if [ "$GPGSIGN" = "0" ];then
#echo "GPGSIGN=$GPGSIGN specified, the repo will not be signed"
echo "" >> conf/distributions
else
else
echo "SignWith: 5619700D" >> conf/distributions
echo "" >> conf/distributions
fi
@ -373,7 +375,7 @@ __EOF__
chmod 775 mklocalrepo.sh
#
# Add a buildinfo file into the tar.bz2 file to track information about the build
# Add a buildinfo file into the tar.bz2 file to track information about the build
#
BUILDINFO=$local_core_repo_path/buildinfo
echo "VERSION=$ver" > $BUILDINFO
@ -399,14 +401,10 @@ __EOF__
chgrp root $tar_name
chmod g+w $tar_name
if [ "$BUILDDESTDIR" ]; then
rm -rf $BUILDDESTDIR
mkdir -p $BUILDDESTDIR
cp $tar_name $BUILDDESTDIR
if [ -n "$DEST" ]; then
ln -sf $(basename `pwd`)/$tar_name ../$tar_name
if [ $? != 0 ]; then
echo "Failed to copy $tar_name to $BUILDDESTDIR"
else
echo "Copied $tar_name to $BUILDDESTDIR"
echo "ERROR: Failed to make symbol link $DEST/$tar_name"
fi
fi
@ -543,7 +541,7 @@ __EOF__
APT_DIR="${FRS}/xcat"
APT_REPO_DIR="${APT_DIR}/repos/apt/devel"
# Decide whether to upload the xcat-dep package or NOT (default is to NOT upload xcat-dep
# Decide whether to upload the xcat-dep package or NOT (default is to NOT upload xcat-dep
if [ "$UP" != "1" ]; then
echo "Upload not specified, Done! (rerun with UP=1, to upload)"
cd $old_pwd

View File

@ -32,8 +32,6 @@
# GPGSIGN/RPMSIGN=0 or GPGSIGN/RPMSIGN=1 - Sign the RPMs using the keys on GSA, the default is to sign the rpms without GPGSIGN/RPMSIGN specified
# DEST=<directory> - provide a directory to contains the build result
#
# BUILDDESTDIR=<build_tarball_dir> - Copy build core tarball to BUILDDESTDIR if defined
#
# The following environment variables can be modified if you need
#
@ -92,7 +90,7 @@ if [ -z "$RPMSIGN" ] && [ -z "$GPGSIGN" ]; then
elif [ -n "$GPGSIGN" ]; then # use GPGSIGN in first
RPMSIGN=$GPGSIGN
fi
if [ -z "$RPMSIGN" -o "$RPMSIGN" != "0" ]; then
if [ -z "$RPMSIGN" -o "$RPMSIGN" != "1" ]; then
RPMSIGN=0
fi
if [ -z "$BUILDALL" ]; then
@ -464,17 +462,20 @@ if [ "$OSNAME" != "AIX" ]; then
echo '%_signature gpg' >> $MACROS
fi
if ! $GREP '%_gpg_name' $MACROS 2>/dev/null; then
echo '%_gpg_name xCAT Security Key' >> $MACROS
echo '%_gpg_name xCAT Automatic Signing Key' >> $MACROS
fi
echo "Signing RPMs..."
build-utils/rpmsign.exp `find $DESTDIR -type f -name '*.rpm'` | grep -v -E '(already contains identical signature|was already signed|rpm --quiet --resign|WARNING: standard input reopened)'
build-utils/rpmsign.exp $SRCDIR/*rpm | grep -v -E '(already contains identical signature|was already signed|rpm --quiet --resign|WARNING: standard input reopened)'
createrepo --checksum sha $DESTDIR # specifying checksum so the repo will work on rhel5
createrepo --checksum sha $SRCDIR
# RHEL5 is archaic. Use the default hash algorithm to do the checksum.
# Which is SHA-256 on RHEL6.
createrepo $DESTDIR
createrepo $SRCDIR
rm -f $SRCDIR/repodata/repomd.xml.asc
rm -f $DESTDIR/repodata/repomd.xml.asc
gpg -a --detach-sign $DESTDIR/repodata/repomd.xml
gpg -a --detach-sign $SRCDIR/repodata/repomd.xml
# Use the xCAT Automatic Signing Key to do the signing
gpg -a --detach-sign --default-key 5619700D $DESTDIR/repodata/repomd.xml
gpg -a --detach-sign --default-key 5619700D $SRCDIR/repodata/repomd.xml
if [ ! -f $DESTDIR/repodata/repomd.xml.key ]; then
${WGET_CMD} -q -P $DESTDIR/repodata $GSA/keys/repomd.xml.key
fi
@ -482,8 +483,8 @@ if [ "$OSNAME" != "AIX" ]; then
${WGET_CMD} -P $SRCDIR/repodata $GSA/keys/repomd.xml.key
fi
else
createrepo --checksum sha $DESTDIR
createrepo --checksum sha $SRCDIR
createrepo $DESTDIR
createrepo $SRCDIR
fi
fi
@ -585,14 +586,10 @@ fi
chgrp $SYSGRP $TARNAME
chmod g+w $TARNAME
if [ "$BUILDDESTDIR" ]; then
rm -rf $BUILDDESTDIR
mkdir -p $BUILDDESTDIR
cp $TARNAME $BUILDDESTDIR
if [ -n "$DEST" ]; then
ln -sf $(basename `pwd`)/$TARNAME ../$TARNAME
if [ $? != 0 ]; then
echo "Failed to copy $TARNAME to $BUILDDESTDIR"
else
echo "Copied $TARNAME to $BUILDDESTDIR"
echo "ERROR: Failed to make symbol link $DEST/$TARNAME"
fi
fi

View File

@ -3,21 +3,21 @@
#
# Package up all the xCAT open source dependencies
# - creating the yum repos
# - tar up the deps package
# - tar up the deps package
#
# This script assumes that the individual rpms have already been compiled
# for the relevant architectures from the src & spec files in git.
#
# Dependencies:
# - createrepo command needs to be present on the build machine
# - createrepo command needs to be present on the build machine
#
# Usage: builddep.sh [attr=value attr=value ...]
# DESTDIR=<dir> - the dir to place the dep tarball in. The default is ../../../xcat-dep,
# DESTDIR=<dir> - the dir to place the dep tarball in. The default is ../../../xcat-dep,
# relative to where this script is located.
# UP=0 or UP=1 - override the default upload behavior
# FRSYUM=0 - put the directory of individual rpms in the project web area instead
# UP=0 or UP=1 - override the default upload behavior
# FRSYUM=0 - put the directory of individual rpms in the project web area instead
# of the FRS area.
# VERBOSE=1 - Set to 1 to see more VERBOSE output
# VERBOSE=1 - Set to 1 to see more VERBOSE output
# you can change this if you need to
USER=xcat
@ -45,7 +45,7 @@ fi
if [ ! -d $GSA ]; then
echo "ERROR: This script is intended to be used by xCAT development..."
echo "ERROR: The GSA directory ($GSA) directory does not appear to be mounted, cannot continue!"
echo "ERROR: The GSA directory ($GSA) directory does not appear to be mounted, cannot continue!"
exit 1
fi
@ -73,7 +73,7 @@ XCATCOREDIR=`/bin/pwd`
if [ -z "$DESTDIR" ]; then
# This is really a hack here because it depends on the build
# environment structure. However, it's not expected that
# users are building the xcat-dep packages
# users are building the xcat-dep packages
DESTDIR=../../xcat-dep
fi
@ -129,7 +129,7 @@ if [ "$OSNAME" != "AIX" ]; then
echo "===> Modifying the xcat-dep.repo files to point to the correct location..."
# 10/01/2015 - vkhu
# The URLs have been updated in GSA, this section is not needed at the moment
#
#
#if [ "$FRSYUM" != 0 ]; then
# newurl="$YUMREPOURL2"
# oldurl="$YUMREPOURL1"
@ -218,12 +218,12 @@ fi
# Get the permissions and group correct
if [ "$OSNAME" == "AIX" ]; then
# AIX
# AIX
SYSGRP=system
YUM=aix
FRSDIR='2.x_AIX'
else
# Linux
# Linux
SYSGRP=root
YUM=yum/devel
FRSDIR='2.x_Linux'
@ -232,9 +232,9 @@ chgrp -R -h $SYSGRP *
chmod -R g+w *
echo "===> Building the tarball..."
#
#
# Want to stay above xcat-dep so we can rsync the whole directory
# DO NOT CHANGE DIRECTORY AFTER THIS POINT!!
# DO NOT CHANGE DIRECTORY AFTER THIS POINT!!
#
cd ..
pwd
@ -254,12 +254,12 @@ else
tar $verbosetar -jcf $DFNAME xcat-dep
fi
if [[ ${UP} -eq 0 ]]; then
if [[ ${UP} -eq 0 ]]; then
echo "Upload not being done, set UP=1 to upload to xcat.org"
exit 0;
fi
# Upload the directory structure to xcat.org yum area (xcat/repos/yum).
# Upload the directory structure to xcat.org yum area (xcat/repos/yum).
if [ "$FRSYUM" != 0 ]; then
links="-L" # FRS does not support rsyncing sym links
else
@ -284,7 +284,7 @@ echo "Uploading README to $FRS/xcat-dep/$FRSDIR/ ..."
while [ $((i+=1)) -le 5 ] && ! rsync -v README $USER@$TARGET_MACHINE:$FRS/xcat-dep/$FRSDIR/
do : ; done
# For some reason the README is not updated
# For some reason the README is not updated
echo "Uploading README to $YUMDIR/$YUM/ ..."
while [ $((i+=1)) -le 5 ] && ! rsync -v README $USER@$TARGET_MACHINE:$YUMDIR/$YUM/
do : ; done

View File

@ -35,7 +35,7 @@ ls $CURDIR/makerpm
if [ $? -gt 0 ]; then
echo "Error:no repo exist, exit 1."
exit 1
exit 1
fi
# Get a lock, so can not do 2 builds at once
@ -77,8 +77,8 @@ echo "This is an Ubuntu system"
fi
cd -
mv ${rpmname_low}* $CURDIR/build
done
done
#delete all files except .deb file
find $CURDIR/build/* ! -name *.deb | xargs rm -f
@ -97,31 +97,31 @@ echo "This is an $OSNAME system"
fi
mkdir -p $CURDIR/build/
#always build perl-xCAT
$CURDIR/makerpm perl-xCAT
$CURDIR/makerpm perl-xCAT
# Build the rest of the noarch rpms
for rpmname in xCAT-client xCAT-server xCAT-IBMhpc xCAT-rmc xCAT-test xCAT-buildkit xCAT-vlan; do
if [ "$OSNAME" = "AIX" -a "$rpmname" = "xCAT-buildkit" ]; then continue; fi
if [ "$OSNAME" = "AIX" -a "$rpmname" = "xCAT-buildkit" ]; then continue; fi
$CURDIR/makerpm $rpmname
done
#build xCAT-genesis-scripts if it is x86_64 platform
ARCH=$(uname -p)
if [ "$ARCH" = "x86_64" ]; then
$CURDIR/makerpm xCAT-genesis-scripts x86_64
if [ "$ARCH" = "x86_64" ]; then
$CURDIR/makerpm xCAT-genesis-scripts x86_64
else
$CURDIR/makerpm xCAT-genesis-scripts ppc64
fi
# Build the xCAT and xCATsn rpms for all platforms
for rpmname in xCAT xCATsn; do
if [ "$OSNAME" = "AIX" ]; then
$CURDIR/makerpm $rpmname
if [ $? -ne 0 ]; then FAILEDRPMS="$FAILEDRPMS $rpmname"; fi
if [ $? -ne 0 ]; then FAILEDRPMS="$FAILEDRPMS $rpmname"; fi
else
for arch in x86_64 ppc64 s390x; do
$CURDIR/makerpm $rpmname $arch
@ -133,7 +133,7 @@ echo "This is an $OSNAME system"
if [ "$OS" = "SUSE" ]; then
cp /usr/src/packages/RPMS/noarch/* $CURDIR/build/
cp /usr/src/packages/RPMS/x86_64/* $CURDIR/build/
cp /usr/src/packages/RPMS/ppc64/* $CURDIR/build/
cp /usr/src/packages/RPMS/ppc64/* $CURDIR/build/
else
cp /root/rpmbuild/RPMS/noarch/* $CURDIR/build/
cp /root/rpmbuild/RPMS/x86_64/* $CURDIR/build/
@ -158,7 +158,7 @@ EOF
rm -f /etc/zypp/repos.d/xcat-core.repo
zypper ar file://$CURDIR/build xcat-core
fi
fi

View File

@ -20,15 +20,15 @@ parser.add_option("--prefix", dest="PREFIX", help="Specify the location of the P
POD2RST="pod2rst"
def cmd_exists(cmd):
return subprocess.call("type " + cmd, shell=True,
return subprocess.call("type " + cmd, shell=True,
stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0
prefix_path = None
prefix_lib_path = None
if options.PREFIX:
if options.PREFIX:
if '~' in options.PREFIX:
# else assume full path is provided
# else assume full path is provided
prefix_path = os.path.expanduser(options.PREFIX)
else:
prefix_path = options.PREFIX
@ -39,13 +39,13 @@ if options.PREFIX:
sys.exit(1)
prefix_lib_path = "%s/lib" %(prefix_path)
if not os.path.isdir(prefix_lib_path):
if not os.path.isdir(prefix_lib_path):
prefix_lib_path = "%s/lib64" %(prefix_path)
if not os.path.isdir(prefix_lib_path):
print "ERROR, Cannot find the Perl lib directory in %s/lib or %s/lib64" %(prefix_path, prefix_path)
sys.exit(1)
else:
else:
if not cmd_exists(POD2RST):
print "ERROR, %s requires pod2rst to continue!" %(os.path.basename(__file__))
parser.print_help()
@ -57,13 +57,13 @@ MANPAGE_DEST="./docs/source/guides/admin-guides/references/man"
#
# add the following to delete the generate files before creating them
# essentially this allows us to remove man pages and they will be
# essentially this allows us to remove man pages and they will be
# removed in the generation
print "Cleaning up the generated man pages in %s" %(MANPAGE_DEST)
allfiles = glob("%s*/*.rst" %(MANPAGE_DEST))
for d in allfiles:
# Skip over the index.rst file
if not "index.rst" in d:
for d in allfiles:
# Skip over the index.rst file
if not "index.rst" in d:
print "Removing file %s" %(d)
os.remove(d)
@ -93,18 +93,18 @@ def fix_double_dash(rst_file):
os.system(sed_cmd)
#remove intermediate .sed1 file
rm_sed1file_cmd = "rm %s.sed1" %(rst_file)
os.system(rm_sed1file_cmd)
os.system(rm_sed1file_cmd)
build_db_man_pages()
# List the xCAT component directory which contain pod pages
COMPONENTS = ['xCAT-SoftLayer', 'xCAT-test', 'xCAT-client', 'xCAT-vlan', 'perl-xCAT', 'xCAT-buildkit']
for component in COMPONENTS:
for component in COMPONENTS:
for root,dirs,files in os.walk("%s" %(component)):
for file in files:
# only interested in .pod files
# only interested in .pod files
if file.endswith(".pod"):
pod_input = os.path.join(root,file)
@ -130,7 +130,7 @@ for component in COMPONENTS:
cmd = "perl -I %s/share/perl5 %s/bin/%s " %(prefix_path, prefix_path, POD2RST)
cmd += " --infile=%s --outfile=%s --title=%s.%s" %(pod_input, rst_output, title, man_ver)
# print cmd
# print cmd
os.system(cmd)
if man_ver == '1' or man_ver == '8':
fix_vertical_bar(rst_output)

View File

@ -10,11 +10,11 @@ The documentation project is written in restructured text (.rst) using Sphinx an
* Using pip, install or update sphinx (See: http://pip.readthedocs.org/)
```
pip install sphinx
pip install sphinx
```
or
```
pip install sphinx --upgrade
pip install sphinx --upgrade
```
* Using pip, install ReadTheDocs theme

View File

@ -5,4 +5,4 @@ Questions & Answers
:maxdepth: 2
makehosts.rst
makehosts.rst

View File

@ -67,15 +67,15 @@ Q: How to configure aliases?
There are 3 methods to configure aliases:
#. Use ``hostnames`` in ``hosts`` table to configure aliases for the installnic.
#. If you want to use script ``confignetwork`` to configure secondary NICs, suggest to use ``aliases`` in ``nics`` table to configure aliases. Refer to :doc:`Configure Aliases <../guides/admin-guides/manage_clusters/common/deployment/network/cfg_network_aliases>`
#. If you want to generate aliases records in ``/etc/hosts`` for secondary NICs and you don't want to use the script ``confignetwork`` to configure these NICs, suggest to use ``otherinterfaces`` in ``hosts`` table to configure aliases. Refer to following example:
#. If you want to use script ``confignetwork`` to configure secondary NICs, suggest to use ``aliases`` in ``nics`` table to configure aliases. Refer to :doc:`Configure Aliases <../guides/admin-guides/manage_clusters/common/deployment/network/cfg_network_aliases>`
#. If you want to generate aliases records in ``/etc/hosts`` for secondary NICs and you don't want to use the script ``confignetwork`` to configure these NICs, suggest to use ``otherinterfaces`` in ``hosts`` table to configure aliases. Refer to following example:
* If you want to add ``node1-hd`` ``20.1.1.1`` in ``hosts`` table, and don't use ``confignetwork`` to configure it, you can add ``otherinterfaces`` like this: ::
chdef node1 otherinterfaces="node1-hd:20.1.1.1"
chdef node1 otherinterfaces="node1-hd:20.1.1.1"
* After executing ``makehosts -n``, you can get records in ``/etc/hosts`` like following: ::
20.1.1.1 node1-hd
**Note**: If suffixes or aliases for the same IP are configured in both ``hosts`` table and ``nics`` table, will cause conflicts. ``makehosts`` will use values from ``nics`` table. The values from ``nics`` table will over-write that from ``hosts`` table to create ``/etc/hosts`` records.
@ -90,7 +90,7 @@ You can follow the best practice example.
* There are 2 networks in different domains: ``mgtnetwork`` and ``pubnetwork``
* ``mgtnetwork`` is xCAT management network
* Generate 2 records with the same hostname in ``/etc/hosts``, like: ::
10.5.106.101 node1.cluster.com
192.168.20.101 node1.public.com
@ -101,11 +101,11 @@ You can follow the best practice example.
#. Add networks entry in ``networks`` table: ::
chdef -t network mgtnetwork net=10.0.0.0 mask=255.0.0.0 domain=cluster.com
chdef -t network pubnetwork net=192.168.30.0 mask=255.255.255.0 domain=public.com
chdef -t network pubnetwork net=192.168.30.0 mask=255.255.255.0 domain=public.com
#. Create ``node1`` with ``ip=10.5.106.101``, xCAT can manage and install this node: ::
chdef node1 ip=10.5.106.101 groups=all
chdef node1 ip=10.5.106.101 groups=all
#. Create ``node1-pub`` with ``ip=192.168.30.101``, this node is only used to generate ``/etc/hosts`` records for public network, can use ``_unmanaged`` group name to label it: ::
@ -114,9 +114,9 @@ You can follow the best practice example.
#. Execute ``makehosts -n`` to generate ``/etc/hosts`` records: ::
makehosts -n
#. Check results in ``/etc/hosts``: ::
10.5.106.101 node1 node1.cluster.com
192.168.30.101 node1-pub node1.public.com

View File

@ -66,15 +66,15 @@ Q: How to configure aliases?
There are 3 methods to configure aliases:
#. Use ``hostnames`` in ``hosts`` table to configure aliases for the installnic.
#. If you want to use script ``confignetwork`` to configure secondary NICs, suggest to use ``aliases`` in ``nics`` table to configure aliases, you can refer to :doc:`Configure Aliases <../guides/admin-guides/manage_clusters/common/deployment/network/cfg_network_aliases>`
#. If you want to generate aliases records in ``/etc/hosts`` for secondary NICs, and don't want to use script ``confignetwork`` to configure these NICs, suggest to use ``otherinterfaces`` in ``hosts`` table to configure aliases. You can refer to following example:
#. If you want to use script ``confignetwork`` to configure secondary NICs, suggest to use ``aliases`` in ``nics`` table to configure aliases, you can refer to :doc:`Configure Aliases <../guides/admin-guides/manage_clusters/common/deployment/network/cfg_network_aliases>`
#. If you want to generate aliases records in ``/etc/hosts`` for secondary NICs, and don't want to use script ``confignetwork`` to configure these NICs, suggest to use ``otherinterfaces`` in ``hosts`` table to configure aliases. You can refer to following example:
* If you want to add ``node1-hd`` ``20.1.1.1`` in ``hosts`` table, and don't use ``confignetwork`` to configure it, you can add ``otherinterfaces`` like this: ::
chdef node1 otherinterfaces="node1-hd:20.1.1.1"
chdef node1 otherinterfaces="node1-hd:20.1.1.1"
* After executing ``makehosts -n``, you can get records in ``/etc/hosts`` like following: ::
20.1.1.1 node1-hd
**Note**: If suffixes or aliases for the same IP are configured in both ``hosts`` table and ``nics`` table, will cause conflicts. ``makehosts`` will use values from ``nics`` table. The values from ``nics`` table will over-write that from ``hosts`` table to create ``/etc/hosts`` records.
@ -89,7 +89,7 @@ You can follow the best practice example.
* There are 2 networks in different domains: ``mgtnetwork`` and ``pubnetwork``
* ``mgtnetwork`` is xCAT management network
* Generate 2 records with the same hostname in ``/etc/hosts``, like: ::
10.5.106.101 node1.cluster.com
192.168.20.101 node1.public.com
@ -100,11 +100,11 @@ You can follow the best practice example.
#. Add networks entry in ``networks`` table: ::
chdef -t network mgtnetwork net=10.0.0.0 mask=255.0.0.0 domain=cluster.com
chdef -t network pubnetwork net=192.168.30.0 mask=255.255.255.0 domain=public.com
chdef -t network pubnetwork net=192.168.30.0 mask=255.255.255.0 domain=public.com
#. Create ``node1`` with ``ip=10.5.106.101``, xCAT can manage and install this node: ::
chdef node1 ip=10.5.106.101 groups=all
chdef node1 ip=10.5.106.101 groups=all
#. Create ``node1-pub`` with ``ip=192.168.30.101``, this node is only used to generate ``/etc/hosts`` records for public network, can use ``_unmanaged`` group name to label it: ::
@ -113,9 +113,9 @@ You can follow the best practice example.
#. Execute ``makehosts -n`` to generate ``/etc/hosts`` records: ::
makehosts -n
#. Check results in ``/etc/hosts``: ::
10.5.106.101 node1 node1.cluster.com
192.168.30.101 node1-pub node1.public.com

View File

@ -7,8 +7,8 @@ The chain table (``tabdump chain``) is an xCAT database table that holds the cha
* currchain
* chain
To know how are those three attributes used, reference the picture:
To know how are those three attributes used, reference the picture:
.. image:: chain_tasks_logic.png
.. image:: chain_tasks_logic.png

View File

@ -18,14 +18,14 @@ Currently only the ``bmcsetup`` command is officially supplied by xCAT to run to
runimage=<URL>
**URL** is a string which can be run by ``wget`` to download the image from the URL. The example could be: ::
runimage=http://<IP of xCAT Management Node>/<dir>/image.tgz
The ``image.tgz`` **must** have the following properties:
* Created using the ``tar zcvf`` command
* The tarball must include a ``runme.sh`` script to initiate the execution of the runimage
To create your own image, reference :ref:`creating image for runimage <create_image_for_runimage>`.
To create your own image, reference :ref:`creating image for runimage <create_image_for_runimage>`.
**Tip**: You could try to run ``wget http://<IP of xCAT Management Node>/<dir>/image.tgz`` manually to make sure the path has been set correctly.
@ -41,5 +41,5 @@ Causes the genesis kernel to create a shell for the administrator to log in and
* standby
Causes the genesis kernel to go into standby and wait for tasks from the chain. ...
Causes the genesis kernel to go into standby and wait for tasks from the chain. ...

View File

@ -14,7 +14,7 @@ How to prepare a image for ``runimage`` in ``chain``
* go to the directory and run `tar -zcvf <image> .`
* Example
In the example, it shows how to install an independent pkg a.rpm
In the example, it shows how to install an independent pkg a.rpm
* Create the directory for the image: ::
@ -29,7 +29,7 @@ How to prepare a image for ``runimage`` in ``chain``
cat runme.sh
echo "start installing a.rpm"
rpm -ivh a.rpm
rpm -ivh a.rpm
* modify the runme.sh script permission: ::

View File

@ -12,4 +12,4 @@ The **chain** mechanism is implemented in xCAT genesis system. The genesis is a
chain_tasks.rst
run_tasks_during_discovery.rst
run_tasks_to_configure.rst

View File

@ -4,5 +4,5 @@ Compute Node
.. toctree::
:maxdepth: 2
changing_hostname_ip.rst
changing_hostname_ip.rst
replace/index.rst

View File

@ -4,7 +4,7 @@ OpenPOWER Nodes
When compute nodes are physically replaced in the frame, leverage xCAT to re-discover the compute nodes. The following guide can be used for:
* IBM OpenPOWER S822LC for HPC
* IBM OpenPOWER S822LC for HPC
#. Identify the machine(s) to be replaced: ``frame10cn02``.
@ -15,24 +15,24 @@ When compute nodes are physically replaced in the frame, leverage xCAT to re-dis
#. Set the outgoing machine to ``offline`` and remove attributes of the machine: ::
nodeset frame10cn02 offline
nodeset frame10cn02 offline
chdef frame10cn02 mac=""
#. If using **MTMS**-based discovery, fill in the Model-Type and Serial Number for the machine: ::
chdef frame10cn02 mtm=8335-GTB serial=<NEW SERIAL NUMBER>
#. If using **SWITCH**-based discovery, go on to the next step. The ``switch`` and ``switch-port`` should already be set in the compute node definition.
#. If using **SWITCH**-based discovery, go on to the next step. The ``switch`` and ``switch-port`` should already be set in the compute node definition.
Node attributes will be replaced during the discovery process (mtm, serial, mac, etc.)
#. Search for the new BMC in the open range: ::
bmcdiscover --range <IP open range> -w -z
bmcdiscover --range <IP open range> -w -z
#. When the BMC is found, start the discovery with the following commands: ::
rsetboot /node-8335.* net
rpower /node-8335.* boot
rpower /node-8335.* boot

View File

@ -4,7 +4,7 @@ Cluster Maintenance
.. toctree::
:maxdepth: 2
compute_node/index.rst
compute_node/index.rst
mgmt_node/index.rst
service_node/index.rst
sw_fw_inventory.rst

View File

@ -70,7 +70,7 @@ Nothing to do.
PostgreSQL
^^^^^^^^^^
- Edit ``/etc/xcat/cfgloc`` file...
- Edit ``/etc/xcat/cfgloc`` file...
Replace ``Pg:dbname=xcatdb;host=<old_MN_ip>|xcatadm|xcat20`` with ``Pg:dbname=xcatdb;host=<new_MN_ip>|xcatadm|xcat20``.
@ -81,7 +81,7 @@ PostgreSQL
MySQL
^^^^^
- Edit ``/etc/xcat/cfglooc``...
- Edit ``/etc/xcat/cfglooc``...
Replace ``mysql:dbname=xcatdb;host=<old_MN_ip>|xcatadmin|xcat20`` with ``mysql:dbname=xcatdb;host=<new_MN_ip>|xcatadmin|xcat20``
Start the database
@ -151,7 +151,7 @@ For example, the old IP address was "10.6.0.1"
chdef -t node cn1-cn4 conserver=<new_ip_address>
* Repeat the same process for the other attributes containing the old IP address.
* Repeat the same process for the other attributes containing the old IP address.
Change networks table
^^^^^^^^^^^^^^^^^^^^^
@ -178,7 +178,7 @@ If the old address still exists in the ``*.csv`` file, you can edit this file, t
Generate SSL credentials(optional)
----------------------------------
Use the following command to generate new SSL credentials: ``xcatconfig -c``.
Use the following command to generate new SSL credentials: ``xcatconfig -c``.
Then update the following in xCAT:

View File

@ -4,4 +4,4 @@ Management Node
.. toctree::
:maxdepth: 2
changing_hostname_ip.rst
changing_hostname_ip.rst

View File

@ -4,4 +4,4 @@ Service Node
.. toctree::
:maxdepth: 2
changing_hostname_ip.rst
changing_hostname_ip.rst

View File

@ -5,7 +5,7 @@ Starting the confetty client
As the root user, running ``/opt/confluent/bin/confetty`` will open the confetty prompt ::
[root@c910f02c05p03 ~]# /opt/confluent/bin/confetty
/ ->
/ ->
Creating a non root user
========================
@ -26,19 +26,19 @@ It's recommenteed to create a non root user to use to connect to confetty
password="********"
Connecting to a remote server
Connecting to a remote server
=============================
In order to do remote sessions, keys must first be added to ``/etc/confluent``
* /etc/confluent/privkey.pem - private key
* /etc/confluent/privkey.pem - private key
* /etc/confluent/srvcert.pem - server cert
If you want to use the xCAT Keys, you can simple copy them into ``/etc/confluent`` ::
cp /etc/xcat/cert/server-key.pem /etc/confluent/privkey.pem
cp /etc/xcat/cert/server-cert.pem /etc/confluent/srvcert.pem
cp /etc/xcat/cert/server-cert.pem /etc/confluent/srvcert.pem
The user and password may alternatively be provided via environment variables: ::
@ -55,4 +55,4 @@ If you want to run a confluent command against another host, could set the CONFL
CONFLUENT_HOST=<remote_ip>
export CONFLUENT_HOST

View File

@ -4,4 +4,4 @@ confluent-client
.. toctree::
:maxdepth: 2
confluent_client.rst
confluent_client.rst

View File

@ -1,9 +1,9 @@
Confluent
Confluent
=========
Confluent is a new codebase with a few goals in mind:
* Augment xCAT 2.X series
* Augment xCAT 2.X series
* Potentially serve in place of xCAT-server for the next generation of xCAT
**Disclaimer:** *Confluent code in conjunction with xCAT 2.X is currently BETA, use at your own risk*

View File

@ -1,8 +1,8 @@
Getting Started
Getting Started
===============
Confluent is intended to be used in conjunction with xCAT.
Confluent is intended to be used in conjunction with xCAT.
The following documentation assumes that xCAT is already installed and configured on the management node.
Download confluent
@ -20,7 +20,7 @@ To build from source, ensure your machine has the correct development packages t
cd confluent/confluent_client ; ./buildrpm ; cd -
Install
Install
=======
dependency
@ -59,7 +59,7 @@ To start confluent::
service confluent start
To stop confluent::
service confluent stop
If you want confluent daemon to start automatically at bootup, add confluent service to ``chkconfig``::
@ -113,11 +113,11 @@ Configure the httpd configuration for confluent-api by creating a ``confluent.co
<Location /confluent-api>
ProxyPass http://10.2.5.3:4005
</Location>
# restart httpd
# restart httpd
service httpd restart
Now point your browser to: ``http://<server ip>:<port>`` and log in with the non-root user and password created above.
Now point your browser to: ``http://<server ip>:<port>`` and log in with the non-root user and password created above.
Confluent consoles
==================

View File

@ -1,8 +1,8 @@
confluent-server
confluent-server
================
.. toctree::
:maxdepth: 2
confluent_server.rst
confluent_server.rst

View File

@ -36,7 +36,7 @@ Create Configuration File
'''''''''''''''''''''''''
Define configuration file ``docker-registry`` under ``/docker-registry/`` folder as below. ::
#!/bin/bash
docker_command=$1
@ -84,7 +84,7 @@ Start registry service: ::
Method 2: Managing Docker Registry with Compose
```````````````````````````````````````````````
Docker Compose it is a tool for defining and running Docker applications. It could help setting up registry.
Docker Compose it is a tool for defining and running Docker applications. It could help setting up registry.
Install Docker Compose
''''''''''''''''''''''
@ -147,10 +147,10 @@ List Available Images in Registry
`````````````````````````````````````
::
curl -k https://domainname:5000/v2/_catalog
curl -k https://domainname:5000/v2/_catalog
Pull Images from Registry
`````````````````````````
`````````````````````````
Just use the "tag" image name, which includes the domain name, port, and image name. ::
docker pull domainname:5000/imagename

View File

@ -16,7 +16,7 @@ Overview
:align: right
**Compose** is a native tool shipped by Docker to define and run applications in Docker containers. You use a Compose file to configure your applications/services. Then, using a single command, you create and start all the services from your configuration.
**Compose** is a native tool shipped by Docker to define and run applications in Docker containers. You use a Compose file to configure your applications/services. Then, using a single command, you create and start all the services from your configuration.
By pulling xCAT Docker image and running xCAT Docker image in a container, you get a well-configured xCAT Management Node to start cluster management work, without worrying about the xCAT installation and configuration on different OS and various hardware platforms.
@ -24,10 +24,10 @@ By pulling xCAT Docker image and running xCAT Docker image in a container, you g
xCAT Docker images
------------------
xCAT ships 2 Docker images for Docker host with different architecture:
xCAT ships 2 Docker images for Docker host with different architecture:
* "xcat/xcat-ubuntu-x86_64": run on x86_64 Docker host
* "xcat/xcat-ubuntu-ppc64le": run on ppc64le Docker host
* "xcat/xcat-ubuntu-ppc64le": run on ppc64le Docker host
Each of the xCAT Docker images above has 3 tags corresponding to different xCAT release inside Docker image:
@ -44,7 +44,7 @@ Run xCAT in Docker
Each container with xCAT Docker image running inside is a xCAT management node, the container connects to the compute nodes and hardware control points in the cluster via "bridge" network on the Docker host. Generally, a xCAT container should connect to 2 types of networks( the 2 types of networks might be one network in some cluster):
* "mgtnet": Management network, the network used by the Management Node to install operating systems and manage the nodes. The Management Node and in-band Network Interface Card (NIC) of the nodes are connected to this network. A bridge "mgtbr" will be created and attached to the network interface facing the compute nodes on Docker host
* "svcnet": Service network, the network used by the Management Node to control the nodes using out-of-band management using the Service Processor. A bridge "svcbr" will be created and attached to the network interface facing the hardware control points
* "svcnet": Service network, the network used by the Management Node to control the nodes using out-of-band management using the Service Processor. A bridge "svcbr" will be created and attached to the network interface facing the hardware control points
You are required to determine and specify some necessary information, so that xCAT is well configured and running when the container is started. This includes:
@ -52,7 +52,7 @@ You are required to determine and specify some necessary information, so that xC
* network information: the network configuration of the xCAT container
* cluster information: the domain of the cluster
The information can be specified in 2 ways to run xCAT container:
The information can be specified in 2 ways to run xCAT container:
* in options and arguments of docker commands such as ``docker network create`` or ``docker run``
* in the "docker-compose.yml", which contains all the configuration to start xCAT containers with Compose. This is the recommended way to start xCAT container.
@ -66,7 +66,7 @@ When xCAT Docker container is started, you can access it with ``sudo docker atta
.. toctree::
:maxdepth: 2
setup_docker_host.rst
setup_docker_host.rst
run_xcat_in_docker_compose.rst
run_xcat_in_docker_native.rst
@ -77,21 +77,21 @@ Work with xCAT
Once xCAT Docker container is running, you can use xCAT with the shell inside the container. Since the ssh service has been enabled on the Docker container startup, you can connect to the container via ssh. The default root password is "cluster".
Once you attach or ssh to the container, you will find that xCAT is running and configured, you can play with xCAT and manage your cluster now.
Once you attach or ssh to the container, you will find that xCAT is running and configured, you can play with xCAT and manage your cluster now.
Currently, since xCAT can only generate the diskless osimages of Linux distributions with the same OS version and architecture with xCAT MN. If you need to provision diskless osimages besides ubuntu x86_64 with xCAT running in the Docker, you can use ``imgexport`` and ``imgimport`` to import the diskless osimages generated before.
If you start up the xCAT Docker container by following the steps described in sections above strictly, without specifying "--dns=IP_ADDRESS...", "--dns-search=DOMAIN...", or "--dns-opt=OPTION..." options, Docker uses the /etc/resolv.conf of the host machine (where the docker daemon runs). Any DNS problem inside container, make sure the DNS server on the Docker host works well.
Save and Restore xCAT data
Save and Restore xCAT data
----------------------------
According to the policy of Docker, Docker image should only be the service deployment unit, it is not recommended to save data in Docker image. Docker uses "Data Volume" to save persistent data inside container, which can be simply taken as a shared directory between Docker host and Docker container.
According to the policy of Docker, Docker image should only be the service deployment unit, it is not recommended to save data in Docker image. Docker uses "Data Volume" to save persistent data inside container, which can be simply taken as a shared directory between Docker host and Docker container.
For dockerized xCAT, there are 3 volumes recommended to save and restore xCAT user data.
* "/install": save the osimage resources under "/install" directory
* "/var/log/xcat/": save xCAT logs
* "/install": save the osimage resources under "/install" directory
* "/var/log/xcat/": save xCAT logs
* "/.dbbackup": save and restore xCAT DB tables. You can save the xCAT DB tables with ``dumpxCATdb -p /.dbbackup/`` inside container and xCAT will restore the tables on the container start up.

View File

@ -3,14 +3,14 @@ Run xCAT in Docker with Compose (Recommended)
An example configuration in the documentation
---------------------------------------------
---------------------------------------------
To demonstrate the steps to run xCAT in a Docker container, take a cluster with the following configuration as an example ::
The name of the docker container running xCAT: xcatmn
The name of the docker container running xCAT: xcatmn
The hostname of container xcatmn: xcatmn
The dns domain of the cluster: clusters.com
The dns domain of the cluster: clusters.com
The management network object: mgtnet
The network bridge of management network on Docker host: mgtbr
@ -24,7 +24,7 @@ To demonstrate the steps to run xCAT in a Docker container, take a cluster with
The IP address of eno2 on Docker host: 192.168.0.1/8
The IP address of xCAT container in service network: 192.168.0.101
Install Compose on Docker host
------------------------------
@ -34,27 +34,27 @@ Compose v1.7.0 or above should be installed on Docker host: ::
chmod +x /usr/local/bin/docker-compose
Customize docker-compose file
Customize docker-compose file
-----------------------------
xCAT ships a docker-compose template `docker-compose.yml <https://github.com/immarvin/xcat-docker/blob/master/docker-compose.yml>`_, which is a self-description file including all the configurations to run xCAT in container. You can make up your compose file based on it if you are familiar with `Compose file <https://docs.docker.com/compose/compose-file/>`_ , otherwise, you can simply customize it with the following steps:
xCAT ships a docker-compose template `docker-compose.yml <https://github.com/immarvin/xcat-docker/blob/master/docker-compose.yml>`_, which is a self-description file including all the configurations to run xCAT in container. You can make up your compose file based on it if you are familiar with `Compose file <https://docs.docker.com/compose/compose-file/>`_ , otherwise, you can simply customize it with the following steps:
1. Specify the xCAT Docker image
::
image: [xCAT Docker image name]:[tag]
specify the name and tag of xCAT Docker image, for example "xcat/xcat-ubuntu-x86_64:2.11"
image: [xCAT Docker image name]:[tag]
2. Specify the cluster domain name
specify the name and tag of xCAT Docker image, for example "xcat/xcat-ubuntu-x86_64:2.11"
::
2. Specify the cluster domain name
::
extra_hosts:
- "xcatmn.[cluster domain name] xcatmn:[Container's IP address in management network]"
specify the cluster domain name,i.e, "site.domain" on xCAT Management Node, for example "clusters.com", and the IP address of xCAT Docker container in the management network, such as "10.5.107.101"
specify the cluster domain name,i.e, "site.domain" on xCAT Management Node, for example "clusters.com", and the IP address of xCAT Docker container in the management network, such as "10.5.107.101"
3. Specify the IP address of xCAT container in service network and management network
@ -66,7 +66,7 @@ specify the cluster domain name,i.e, "site.domain" on xCAT Management Node, for
ipv4_address : [Container's IP address in service network]
mgtnet:
ipv4_address : [Container's IP address in management network]
ipv4_address : [Container's IP address in management network]
specify the IP address of Docker container in service network and management network. If the "svcnet" is the same as "mgtnet", the 2 "svcnet" lines should be commented out.
@ -75,30 +75,30 @@ specify the IP address of Docker container in service network and management net
::
networks:
#management network, attached to the network interface on Docker host
#management network, attached to the network interface on Docker host
#facing the nodes to provision
mgtnet:
driver: "bridge"
driver_opts:
com.docker.network.bridge.name: "mgtbr"
ipam:
config:
driver_opts:
com.docker.network.bridge.name: "mgtbr"
ipam:
config:
- subnet: [subnet of mgtbr in CIDR]
gateway:[IP address of mgtbr]
#service network, attached to the network interface on
#Docker host facing the bmc network
svcnet:
driver: "bridge"
driver_opts:
com.docker.network.bridge.name: "svcbr"
ipam:
config:
driver_opts:
com.docker.network.bridge.name: "svcbr"
ipam:
config:
- subnet: [subnet of svcbr in CIDR]
gateway: [IP address of svcbr]
specify the network configuration of bridge networks "mgtnet" and "svcnet", the network configuration of the bridge networks should be same as the network interfaces attached to the bridges. The "mgtnet" and "svcnet" might the same network in some cluster, in this case, you can ignore the lines for "svcnet".
specify the network configuration of bridge networks "mgtnet" and "svcnet", the network configuration of the bridge networks should be same as the network interfaces attached to the bridges. The "mgtnet" and "svcnet" might the same network in some cluster, in this case, you can ignore the lines for "svcnet".
5. Specify the Data Volumes for xCAT Docker container
@ -114,16 +114,16 @@ specify the network configuration of bridge networks "mgtnet" and "svcnet", the
#"dumpxCATdb -p /.dbbackup" should be run manually to save xCAT DB inside container
- [The directory on Docker host mounted to save xCAT DB inside container]:/.dbbackup
#the "/.logs" value is used to keep xCAT logs
#the xCAT logs will be kept if specified
#the xCAT logs will be kept if specified
- [The directory on Docker host to save xCAT logs inside container]:/var/log/xcat/
specify the volumes of the xCAT container used to save and restore xCAT data
Start xCAT Docker container with Compose
Start xCAT Docker container with Compose
----------------------------------------
After the "docker-compose.yml" is ready, the xCAT Docker container can be started with [1]_ ::
docker-compose -f "docker-compose.yml" up -d; \
ifconfig eno1 0.0.0.0; \
brctl addif mgtbr eno1; \
@ -139,7 +139,7 @@ To remove the container, you can run ::
ifup eno1
To update the xCAT Docker image, you can run ::
docker-compose -f "docker-compose.yml" pull
@ -151,6 +151,6 @@ Known Issues
When you start up xCAT Docker container, you might see an error message at the end of the output like ::
Couldn't connect to Docker daemon at http+unix://var/run/docker.sock - is it running? If it's at a non-standard location, specify the URL with the DOCKER_HOST environment variable.
You can ignore it, the container has already been running. It is a Docker bug `#1214 <https://github.com/docker/compose/issues/1214>`_

View File

@ -9,7 +9,7 @@ Now xCAT ships xCAT Docker images(x86_64 and ppc64le) on the `DockerHub <https:/
To pull the latest xCAT Docker image for x86_64, run ::
sudo docker pull xcat/xcat-ubuntu-x86_64
sudo docker pull xcat/xcat-ubuntu-x86_64
On success, you will see the pulled Docker image on Docker host ::
@ -19,12 +19,12 @@ On success, you will see the pulled Docker image on Docker host ::
An example configuration in the documentation
---------------------------------------------
---------------------------------------------
To demonstrate the steps to run xCAT in a Docker container, take a cluster with the following configuration as an example ::
Docker host: dockerhost1
The name of the docker container running xCAT: xcatmn
The name of the docker container running xCAT: xcatmn
The hostname of container xcatmn: xcatmn
The management network object: mgtnet
@ -33,15 +33,15 @@ To demonstrate the steps to run xCAT in a Docker container, take a cluster with
The IP address of eno1 on Docker host: 10.5.107.1/8
The IP address of xCAT container in management network: 10.5.107.101
The dns domain of the cluster: clusters.com
The dns domain of the cluster: clusters.com
Create a customized Docker network on the Docker host
-----------------------------------------------------
**Docker Networks** provide complete isolation for containers, which gives you control over the networks your containers run on. To run xCAT in Docker, you should create a customized bridge network according to the cluster network plan, instead of using the default bridge network created on Docker installation.
**Docker Networks** provide complete isolation for containers, which gives you control over the networks your containers run on. To run xCAT in Docker, you should create a customized bridge network according to the cluster network plan, instead of using the default bridge network created on Docker installation.
As an example, we create a customized bridge network "mgtbr" which is attached to the network interface "eno1" facing the compute nodes and inherits the network configuration of "eno1". Since the commands to create the network will break the network connection on "eno1", you'd better run the commands in one line instead of running them seperatly ::
As an example, we create a customized bridge network "mgtbr" which is attached to the network interface "eno1" facing the compute nodes and inherits the network configuration of "eno1". Since the commands to create the network will break the network connection on "eno1", you'd better run the commands in one line instead of running them seperatly ::
sudo docker network create --driver=bridge --gateway=10.5.107.1 --subnet=10.5.107.0/8 -o "com.docker.network.bridge.name"="mgtbr" mgtnet; \
ifconfig eno1 0.0.0.0; \
@ -51,7 +51,7 @@ As an example, we create a customized bridge network "mgtbr" which is attached t
* ``--driver=bridge`` specify the network driver to be "bridge"
* ``--gateway=10.5.107.1`` specify the network gateway to be the IP address of "eno1" on Docker host. which will also be the IP address of network bridge "mgtbr"
* ``--subnet=10.5.107.0/8`` speify the subnet in CIDR format to be the subnet of "eno1"
* ``com.docker.network.bridge.name"="mgtbr"`` specify the bridge name of management network
* ``com.docker.network.bridge.name"="mgtbr"`` specify the bridge name of management network
* ``ifconfig eno1 0.0.0.0`` delete the IP address of "eno1"
* ``brctl addif mgtbr eno1`` attach the bridge "br0" to network interface "eno1"
* ``ip link set mgtbr up`` change the state of "br0" to UP
@ -68,7 +68,7 @@ Now run the xCAT Docker container with the Docker image "xcat/xcat-ubuntu-x86_64
* use ``--privileged=true`` to give extended privileges to this container
* use ``--hostname`` to specify the hostname of the container, which is available inside the container
* use ``--name`` to assign a name to the container, this name can be used to manipulate the container on Docker host
* use ``--name`` to assign a name to the container, this name can be used to manipulate the container on Docker host
* use ``--add-host="xcatmn.clusers.com xcatmn:10.5.107.101"`` to write the ``/etc/hosts`` entries of Docker container inside container. Since xCAT use the FQDN(Fully Qualified Domain Name) to determine the cluster domain on startup, make sure the format to be "<FQDN> <hostname>: <IP Address>", otherwise, you need to set the cluster domain with ``chdef -t site -o clustersite domain="clusters.com"`` inside the container manually
* use ``--volume /docker/xcatdata/:/install`` to mount a pre-created "/docker/xcatdata" directory on Docker host to "/install" directory inside container as a data volume. This is optional, it is mandatory if you want to backup and restore xCAT data.
* use ``--net=mgtnet`` to connect the container to the Docker network "mgtnet"

View File

@ -4,14 +4,14 @@ Setup Docker host
Install Docker Engine
---------------------
The Docker host to run xCAT Docker image should be a baremental or virtual server with Docker v1.10 or above installed. For the details on system requirements and Docker installation, refer to `Docker Installation Docs <https://docs.docker.com/engine/installation/>`_.
The Docker host to run xCAT Docker image should be a baremental or virtual server with Docker v1.10 or above installed. For the details on system requirements and Docker installation, refer to `Docker Installation Docs <https://docs.docker.com/engine/installation/>`_.
.. note:: Docker images can only run on Docker hosts with the same architecture. Since xCAT only ships x86_64 and ppc64le Docker images, running xCAT in Docker requires x86_64 or ppc64 Docker Hosts.
Shutdown the SELinux/Apparmor on Docker host
--------------------------------------------
If the SELinux or Apparmor on Docker host is enabled, the services/applications inside Docker Container might be confined. To run xCAT in Docker container, SELinux and Apparmor on the Docker host must be disabled.
If the SELinux or Apparmor on Docker host is enabled, the services/applications inside Docker Container might be confined. To run xCAT in Docker container, SELinux and Apparmor on the Docker host must be disabled.
SELinux can be disabled with: ::

View File

@ -4,6 +4,6 @@ Docker
.. toctree::
:maxdepth: 2
dockerized_xcat/dockerized_xcat.rst
dockerized_xcat/dockerized_xcat.rst
lifecycle_management.rst
docker_registry.rst

View File

@ -3,9 +3,9 @@ Docker life-cycle management in xCAT
The Docker linux container technology is currently very popular. xCAT can help managing Docker containers. xCAT, as a system management tool has the natural advantage for supporting multiple operating systems, multiple architectures and large scale clusters.
This document describes how to use xCAT for docker management, from Docker Host setup to docker container operations.
This document describes how to use xCAT for docker management, from Docker Host setup to docker container operations.
.. note:: This document was verified with:
.. note:: This document was verified with:
* Docker Version 1.10, 1.11
* Docker API version 1.22
@ -19,7 +19,7 @@ This document describes how to use xCAT for docker management, from Docker Host
Setting up Docker Host
----------------------
The **Docker Host** is the bare metal server or virtual machine where Docker containers can run. It will be called *dockerhost* in the following sections.
The **Docker Host** is the bare metal server or virtual machine where Docker containers can run. It will be called *dockerhost* in the following sections.
The *dockerhost* at a minimum must provide the following:
@ -28,12 +28,12 @@ The *dockerhost* at a minimum must provide the following:
Preparing osimage for docker host
`````````````````````````````````
The osimage represents the image of the Operating System which will be deployed on the dockerhost.
The osimage represents the image of the Operating System which will be deployed on the dockerhost.
Copy files out from DVDs/ISOs and generate
Copy files out from DVDs/ISOs and generate
""""""""""""""""""""""""""""""""""""""""""
**[ubuntu x86_64]** ::
**[ubuntu x86_64]** ::
copycds ubuntu-xxx-server-amd64.iso
**[ubuntu16.04 ppc64el]** ::
@ -52,31 +52,31 @@ The pkglist file should contain the following: ::
nfs-common
snmpd
bridge-utils
The otherpkglist file should contain the following:
**[ubuntu x86_64]** ::
The otherpkglist file should contain the following:
**[ubuntu x86_64]** ::
# cat /install/custom/ubuntu/ubuntu_docker.pkglist
docker-engine
**[ubuntu16.04 ppc64el]**
**[ubuntu16.04 ppc64el]**
At the time of this writing (February 2016), docker package is not available for **ppc64el** architecture from docker.org. You can follow instructions below on how to manually download and install it.
* Download docker engine for ppc64el:
* Download docker engine for ppc64el:
::
wget http://launchpadlibrarian.net/251622081/docker.io_1.10.3-0ubuntu4_ppc64el.deb -O /install/docker_ppc64el/docker.io_1.10.3-0ubuntu4_ppc64el.deb
* Configure **otherpkgdir** like this:
* Configure **otherpkgdir** like this:
::
otherpkgdir=/install/docker_ppc64el
* The **otherpkglist** file should be:
* The **otherpkglist** file should be:
::
@ -87,7 +87,7 @@ Create the osimage for dockerhost
"""""""""""""""""""""""""""""""""
The osimage for dockerhost will be like this:
**[ubuntu x86_64]** ::
**[ubuntu x86_64]** ::
# lsdef -t osimage ub14.04.03-x86_64-dockerhost
Object name: ub14.04.03-x86_64-dockerhost
@ -125,9 +125,9 @@ Currently, a customer defined network object is needed when create a docker cont
chdef host01 -p postbootscripts="setupdockerhost <netobj_name>=<subnet>/<netmask>@<gateway>[:nicname]"
* netobj_name: the network object to be created, it will be used in *dockernics* when creating docker container
* netobj_name: the network object to be created, it will be used in *dockernics* when creating docker container
* subnet/netmask@gateway: the network which the IP address of docker container running on the docker host must be located in. If *nicname* is specified, the *subnet/netmask* must be the subnet of the nic *nicname* located in. And *gateway* shall be the IP address of the nic *nicname*.
* nicname: the physical nic name which will be attached to the network object
* nicname: the physical nic name which will be attached to the network object
For example, a network object *mynet0* with subnet *10.0.0.0/16* and gateway *10.0.101.1* on nic *eth0* can be created with the command: ::
@ -170,7 +170,7 @@ Create docker instance
* dockerflag - A JSON string which will be used as parameters to create a docker. Reference `docker API v1.22 <https://docs.docker.com/engine/reference/api/docker_remote_api_v1.22/>`_ for more information about which parameters can be specified for "dockerflag".
To create the docker instance *host01c01* with image *ubuntu* and command */bin/bash*, use: ::
mkdocker host01c01 image=ubuntu command=/bin/bash dockerflag="{\"AttachStdin\":true,\"AttachStdout\":true,\"AttachStderr\":true,\"OpenStdin\":true}"
Remove docker instance
@ -242,13 +242,13 @@ If things go wrong:
ip addr show dev <nicname>
* Run **ps -ef | grep docker** to verify docker engine is running with configured options. It should look something like
* Run **ps -ef | grep docker** to verify docker engine is running with configured options. It should look something like
::
root 3703 1 0 Apr15 ? 00:12:28 /usr/bin/docker daemon -H unix:///var/run/docker.sock -H tcp://host01:2375 --tls --tlscacert=/root/.docker/ca-cert.pem --tlscert=/root/.docker/dockerhost-cert.pem --tlskey=/root/.docker/dockerhost-cert.pem --tlsverify=true --raw-logs
If the output is missing some options, verify that file **/lib/systemd/system/docker.service** contains the following lines
If the output is missing some options, verify that file **/lib/systemd/system/docker.service** contains the following lines
::

View File

@ -78,7 +78,7 @@ Preparing for Using a DNS
If you are choosing any of the options for using DNS, follow these steps:
NOTE: This documentation only applies to the xCAT makedns command using the ddns.pm plugin. The ddns.pm plugin is based on named9/bind9, and can not support named8/bind8 due to syntax difference.
NOTE: This documentation only applies to the xCAT makedns command using the ddns.pm plugin. The ddns.pm plugin is based on named9/bind9, and can not support named8/bind8 due to syntax difference.
* Set the **nameservers** and **forwarders** attributes in the xCAT site table. The **nameservers** attribute identifies the DNS server hostname/ip that the nodes point to in their **/etc/resolv.conf** files. The forwarders attribute are the DNS server's ip that can resolve external hostnames. If you are running a DNS on the xCAT MN, it will use the forwarders DNS server to resolve any hostnames it can't.
@ -88,7 +88,7 @@ For example: ::
* Create an /etc/resolv.conf file on the management node
Edit **/etc/resolv.conf** to contain the cluster domain value you set in the site table's **domain** attribute above, and to point to the same DNS server you will be using for your nodes (if you are using DNS).
Edit **/etc/resolv.conf** to contain the cluster domain value you set in the site table's **domain** attribute above, and to point to the same DNS server you will be using for your nodes (if you are using DNS).
Option #1: Running DNS on Your Management Node
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -132,7 +132,7 @@ If you already have a DNS on your site network and you want to use it to solve t
* Set the site **nameservers** value to the IP address of the external name server. ::
chdef -t site nameservers=<external dns IP>
* Set the correct information of external dns into the /etc/resolv.conf on your xCAT MN.
The **domain** and **nameservers** values must be set correctly in **/etc/resolv.conf**. Which should have the same values with the ones your set in the site table.
@ -159,7 +159,7 @@ The **domain** and **nameservers** values must be set correctly in **/etc/resolv
file "db.192.168.1";
};
* To update the name resolution entries from ``/etc/hosts`` or hosts table of xCAT MN to external DNS, run ``makedns -e``
* To update the name resolution entries from ``/etc/hosts`` or hosts table of xCAT MN to external DNS, run ``makedns -e``
Alternatively, you can set site.externaldns=1 and run ``makedns``
@ -191,7 +191,7 @@ The configurations are described below for the two options, note the differences
Once **/etc/hosts** is populated with all of the nodes' hostnames and IP addresses, configure DNS on the management node and start it: ::
makedns -n
makedns -n
When the **/etc/resolv.conf** files for the compute nodes are created the value of the **nameserver** in /etc/resolv.conf is gotten from **site.nameservers** or **networks.nameservers** if it's specified.

View File

@ -4,4 +4,4 @@ Domain Name Resolution
.. toctree::
:maxdepth: 2
domain_name_resolution.rst
domain_name_resolution.rst

View File

@ -17,4 +17,4 @@ To enable ``goconserver``, execute the following steps:
makegocons
The new console logs will start logging to ``/var/log/consoles/<node>.log``
The new console logs will start logging to ``/var/log/consoles/<node>.log``

View File

@ -1,7 +1,7 @@
GPU Management and Monitoring
=============================
The ``nvidia-smi`` command provided by NVIDIA can be used to manage and monitor GPUs enabled Compute Nodes. In conjunction with the xCAT``xdsh`` command, you can easily manage and monitor the entire set of GPU enabled Compute Nodes remotely from the Management Node.
The ``nvidia-smi`` command provided by NVIDIA can be used to manage and monitor GPUs enabled Compute Nodes. In conjunction with the xCAT``xdsh`` command, you can easily manage and monitor the entire set of GPU enabled Compute Nodes remotely from the Management Node.
Example: ::
@ -14,7 +14,7 @@ Example: ::
Management
----------
Some useful ``nvidia-smi`` example commands for management.
Some useful ``nvidia-smi`` example commands for management.
* Set persistence mode, When persistence mode is enabled the NVIDIA driver remains loaded even when no active clients, DISABLED by default::
@ -29,11 +29,11 @@ Some useful ``nvidia-smi`` example commands for management.
nvidia-smi -i 0 -p 0/1
* Set MODE for compute applications, query with --query-gpu=compute_mode::
* Set MODE for compute applications, query with --query-gpu=compute_mode::
nvidia-smi -i 0 -c 0/1/2/3
* Trigger reset of the GPU ::
* Trigger reset of the GPU ::
nvidia-smi -i 0 -r
@ -48,7 +48,7 @@ Some useful ``nvidia-smi`` example commands for management.
Monitoring
----------
Some useful ``nvidia-smi`` example commands for monitoring.
Some useful ``nvidia-smi`` example commands for monitoring.
* The number of NVIDIA GPUs in the system ::
@ -75,7 +75,7 @@ Some useful ``nvidia-smi`` example commands for monitoring.
nvidia-smi -i 0 --query-gpu=compute_mode --format=csv,noheader
* Percent of time over the past sample period during which one or more kernels was executing on the GPU::
nvidia-smi -i 0 --query-gpu=utilization.gpu --format=csv,noheader
* Total errors detected across entire chip. Sum of device_memory, register_file, l1_cache, l2_cache and texture_memory ::
@ -86,14 +86,14 @@ Some useful ``nvidia-smi`` example commands for monitoring.
nvidia-smi -i 0 --query-gpu=temperature.gpu --format=csv,noheader
* The ECC mode that the GPU is currently operating under::
* The ECC mode that the GPU is currently operating under::
nvidia-smi -i 0 --query-gpu=ecc.mode.current --format=csv,noheader
* The power management status::
nvidia-smi -i 0 --query-gpu=power.management --format=csv,noheader
* The last measured power draw for the entire board, in watts::
nvidia-smi -i 0 --query-gpu=power.draw --format=csv,noheader

View File

@ -16,7 +16,7 @@ Add this script to your node object using the ``chdef`` command: ::
Setting GPU Configurations
--------------------------
NVIDIA allows for changing GPU attributes using the ``nvidia-smi`` commands. These settings do not persist when a compute node is rebooted. One way set these attributes is to use an xCAT postscript to set the values every time the node is rebooted.
NVIDIA allows for changing GPU attributes using the ``nvidia-smi`` commands. These settings do not persist when a compute node is rebooted. One way set these attributes is to use an xCAT postscript to set the values every time the node is rebooted.
* Set the power limit to 175W: ::

View File

@ -12,7 +12,7 @@ xCAT provides a sample package list files for CUDA. You can find them at:
* ``/opt/xcat/share/xcat/install/ubuntu/cudafull.ubuntu14.04.3.ppc64el.pkglist``
* ``/opt/xcat/share/xcat/install/ubuntu/cudaruntime.ubuntu14.04.3.ppc64el.pkglist``
**[diskful note]**: There is a requirement to reboot the machine after the CUDA drivers are installed. To satisfy this requirement, the CUDA software is installed in the ``pkglist`` attribute of the osimage definition where the reboot happens after the Operating System is installed.
**[diskful note]**: There is a requirement to reboot the machine after the CUDA drivers are installed. To satisfy this requirement, the CUDA software is installed in the ``pkglist`` attribute of the osimage definition where the reboot happens after the Operating System is installed.
cudafull
^^^^^^^^
@ -21,13 +21,13 @@ cudafull
lsdef -t osimage -z ubuntu14.04.3-ppc64el-install-compute \
| sed 's/install-compute:/install-cudafull:/' \
| mkdef -z
| mkdef -z
#. Add the CUDA repo created in the previous step to the ``pkgdir`` attribute.
If your Management Node IP is 10.0.0.1, the URL for the repo would be ``http://10.0.0.1/install/cuda-repo/ppc64el/var/cuda-repo-7-5-local``, add it to the pkgdir::
chdef -t osimage -o ubuntu14.04.3-ppc64el-install-cudafull \
chdef -t osimage -o ubuntu14.04.3-ppc64el-install-cudafull \
-p pkgdir=http://10.0.0.1/install/cuda-repo/ppc64el/var/cuda-repo-7-5-local
@ -45,7 +45,7 @@ cudaruntime
lsdef -t osimage -z ubuntu14.04.3-ppc64el-install-compute \
| sed 's/install-compute:/install-cudaruntime:/' \
| mkdef -z
| mkdef -z
#. Add the CUDA repo created in the previous step to the ``pkgdir`` attribute:
@ -64,14 +64,14 @@ cudaruntime
Diskless images
---------------
The following examples will create diskless images for ``cudafull`` and ``cudaruntime``. The osimage definitions will be created from the base ``ubuntu14.04.3-ppc64el-netboot-compute`` osimage.
The following examples will create diskless images for ``cudafull`` and ``cudaruntime``. The osimage definitions will be created from the base ``ubuntu14.04.3-ppc64el-netboot-compute`` osimage.
xCAT provides a sample package list files for CUDA. You can find them at:
* ``/opt/xcat/share/xcat/netboot/ubuntu/cudafull.ubuntu14.04.3.ppc64el.pkglist``
* ``/opt/xcat/share/xcat/netboot/ubuntu/cudaruntime.ubuntu14.04.3.ppc64el.pkglist``
**[diskless note]**: For diskless images, the requirement for rebooting the machine is not applicable because the images is loaded on each reboot. The install of the CUDA packages is required to be done in the ``otherpkglist`` **NOT** the ``pkglist``.
**[diskless note]**: For diskless images, the requirement for rebooting the machine is not applicable because the images is loaded on each reboot. The install of the CUDA packages is required to be done in the ``otherpkglist`` **NOT** the ``pkglist``.
cudafull
^^^^^^^^
@ -80,9 +80,9 @@ cudafull
lsdef -t osimage -z ubuntu14.04.3-ppc64el-netboot-compute \
| sed 's/netboot-compute:/netboot-cudafull:/' \
| mkdef -z
| mkdef -z
#. Add the CUDA repo created in the previous step to the ``otherpkgdir`` attribute.
#. Add the CUDA repo created in the previous step to the ``otherpkgdir`` attribute.
If your Management Node IP is 10.0.0.1, the URL for the repo would be ``http://10.0.0.1/install/cuda-repo/ppc64el/var/cuda-repo-7-5-local``, add it to the ``otherpkgdir``::
@ -98,7 +98,7 @@ cudafull
#. Verify that ``acpid`` is installed on the Management Node or on the Ubuntu host where you are generating the diskless image: ::
apt-get install -y acpid
apt-get install -y acpid
#. Generate the image: ::
@ -115,9 +115,9 @@ cudaruntime
lsdef -t osimage -z ubuntu14.04.3-ppc64el-netboot-compute \
| sed 's/netboot-compute:/netboot-cudaruntime:/' \
| mkdef -z
| mkdef -z
#. Add the CUDA repo created in the previous step to the ``otherpkgdir`` attribute.
#. Add the CUDA repo created in the previous step to the ``otherpkgdir`` attribute.
If your Management Node IP is 10.0.0.1, the URL for the repo would be ``http://10.0.0.1/install/cuda-repo/ppc64el/var/cuda-repo-7-5-local``, add it to the ``otherpkgdir``::
@ -133,7 +133,7 @@ cudaruntime
#. Verify that ``acpid`` is installed on the Management Node or on the Ubuntu host where you are generating the diskless image: ::
apt-get install -y acpid
apt-get install -y acpid
#. Generate the image: ::

View File

@ -1,7 +1,7 @@
Create CUDA software repository
===============================
The NVIDIA CUDA Toolkit is available to download at http://developer.nvidia.com/cuda-downloads.
The NVIDIA CUDA Toolkit is available to download at http://developer.nvidia.com/cuda-downloads.
Download the toolkit and prepare the software repository on the xCAT Management Node to server the NVIDIA CUDA files.

View File

@ -3,9 +3,9 @@ Install NVIDIA Management Library (optional)
See https://developer.nvidia.com/nvidia-management-library-nvml for more information.
The .run file can be downloaded from NVIDIAs website and placed into the ``/install/postscripts`` directory on the Management Node.
The .run file can be downloaded from NVIDIAs website and placed into the ``/install/postscripts`` directory on the Management Node.
To enable installation of the management library after the node is install, add the .run file to the ``postbootscripts`` attribute for the nodes: ::
To enable installation of the management library after the node is install, add the .run file to the ``postbootscripts`` attribute for the nodes: ::
# ensure the .run file has execute permission
chmod +x /install/postscripts/<gpu_deployment_kit>.run

View File

@ -9,14 +9,14 @@ Local
A local package repo will contain all of the CUDA packages. Extract the CUDA packages into ``/install/cuda-repo/ppc64le``: ::
# For CUDA toolkit: /root/cuda-repo-ubuntu1404-7-5-local_7.5-18_ppc64el.deb
# Create the repo directory under xCAT /install dir
mkdir -p /install/cuda-repo/ppc64el
# extract the package
dpkg -x /root/cuda-repo-ubuntu1404-7-5-local_7.5-18_ppc64el.deb /install/cuda-repo/ppc64el
Network
-------
@ -31,7 +31,7 @@ The ``sources.list`` entry may look similar to: ::
Authorize the CUDA repo
-----------------------
In order to access the CUDA repository you must import the CUDA GPGKEY into the ``apt_key`` trust list. xCAT provides a sample postscript ``/install/postscripts/addcudakey`` to help with this task: ::
In order to access the CUDA repository you must import the CUDA GPGKEY into the ``apt_key`` trust list. xCAT provides a sample postscript ``/install/postscripts/addcudakey`` to help with this task: ::
chdef -t node -o <noderange> -p postscripts=addcudakey

View File

@ -4,10 +4,10 @@ Verify CUDA Installation
**The following verification steps only apply to the ``cudafull`` installations.**
#. Verify driver version by looking at: ``/proc/driver/nvidia/version``: ::
# cat /proc/driver/nvidia/version
NVRM version: NVIDIA UNIX ppc64le Kernel Module 352.39 Fri Aug 14 17:10:41 PDT 2015
GCC version: gcc version 4.8.5 20150623 (Red Hat 4.8.5-4) (GCC)
GCC version: gcc version 4.8.5 20150623 (Red Hat 4.8.5-4) (GCC)
#. Verify the CUDA Toolkit version ::
@ -19,7 +19,7 @@ Verify CUDA Installation
#. Verify running CUDA GPU jobs by compiling the samples and executing the ``deviceQuery`` or ``bandwidthTest`` programs.
* Compile the samples:
* Compile the samples:
**[RHEL]:** ::
@ -32,13 +32,13 @@ Verify CUDA Installation
cd ~/
apt-get install cuda-samples-7-0 -y
cd /usr/local/cuda-7.0/samples
make
cd /usr/local/cuda-7.0/samples
make
* Run the ``deviceQuery`` sample: ::
# ./bin/ppc64le/linux/release/deviceQuery
# ./bin/ppc64le/linux/release/deviceQuery
./deviceQuery Starting...
CUDA Device Query (Runtime API) version (CUDART static linking)
Detected 4 CUDA Capable device(s)
@ -54,9 +54,9 @@ Verify CUDA Installation
............
deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 7.5, CUDA Runtime Version = 7.5, NumDevs = 4, Device0 = Tesla K80, Device1 = Tesla K80, Device2 = Tesla K80, Device3 = Tesla K80
Result = PASS
* Run the ``bandwidthTest`` sample: ::
# ./bin/ppc64le/linux/release/bandwidthTest
[CUDA Bandwidth Test] - Starting...
Running on...
@ -75,6 +75,6 @@ Verify CUDA Installation
Transfer Size (Bytes) Bandwidth(MB/s)
33554432 141485.3
Result = PASS
NOTE: The CUDA Samples are not meant for performance measurements. Results may vary when GPU Boost is enabled.

View File

@ -1,42 +1,42 @@
Configuration considerations
============================
xCAT provides several configuration options for the HAMN, you can select one of the option based on your failover requirements and hardware configuration, the following configuration considerations should be able to help you to make the decision.
xCAT provides several configuration options for the HAMN, you can select one of the option based on your failover requirements and hardware configuration, the following configuration considerations should be able to help you to make the decision.
Data synchronization mechanism
------------------------------
The data synchronization is important for any high availability configuration. When the xCAT management node failover occurs, the xCAT data needs to be exactly the same before failover, and some of the operating system configuration should also be synchronized between the two management nodes. To be specific, the following data should be synchronized between the two management nodes to make the xCAT HAMN work:
The data synchronization is important for any high availability configuration. When the xCAT management node failover occurs, the xCAT data needs to be exactly the same before failover, and some of the operating system configuration should also be synchronized between the two management nodes. To be specific, the following data should be synchronized between the two management nodes to make the xCAT HAMN work:
* xCAT database
* xCAT configuration files, like ``/etc/xcat``, ``~/.xcat``, ``/opt/xcat``
* The configuration files for the services that are required by xCAT, like named, DHCP, apache, nfs, ssh, etc.
* The operating systems images repository and users customization data repository, the ``/install`` directory contains these repositories in most cases.
* xCAT database
* xCAT configuration files, like ``/etc/xcat``, ``~/.xcat``, ``/opt/xcat``
* The configuration files for the services that are required by xCAT, like named, DHCP, apache, nfs, ssh, etc.
* The operating systems images repository and users customization data repository, the ``/install`` directory contains these repositories in most cases.
There are a lot of ways for data synchronization, but considering the specific xCAT HAMN requirements, only several of the data synchronization options are practical for xCAT HAMN.
There are a lot of ways for data synchronization, but considering the specific xCAT HAMN requirements, only several of the data synchronization options are practical for xCAT HAMN.
**1\. Move physical disks between the two management nodes**: if we could physically move the hard disks from the failed management node to the backup management node, and bring up the backup management node, then both the operating system and xCAT data will be identical between the new management node and the failed management node. RAID1 or disk mirroring could be used to avoid the disk be a single point of failure.
**1\. Move physical disks between the two management nodes**: if we could physically move the hard disks from the failed management node to the backup management node, and bring up the backup management node, then both the operating system and xCAT data will be identical between the new management node and the failed management node. RAID1 or disk mirroring could be used to avoid the disk be a single point of failure.
**2\. Shared data**: the two management nodes use the single copy of xCAT data, no matter which management node is the primary MN, the cluster management capability is running on top of the single data copy. The access to the data could be done through various ways like shared storage, NAS, NFS, samba etc. Based on the protocol being used, the data might be accessible only on one management node at a time or be accessible on both management nodes in parallel. If the data could only be accessed from one management node, the failover process need to take care of the data access transition; if the data could be accessed on both management nodes, the failover does not need to consider the data access transition, it usually means the failover process could be faster.
**2\. Shared data**: the two management nodes use the single copy of xCAT data, no matter which management node is the primary MN, the cluster management capability is running on top of the single data copy. The access to the data could be done through various ways like shared storage, NAS, NFS, samba etc. Based on the protocol being used, the data might be accessible only on one management node at a time or be accessible on both management nodes in parallel. If the data could only be accessed from one management node, the failover process need to take care of the data access transition; if the data could be accessed on both management nodes, the failover does not need to consider the data access transition, it usually means the failover process could be faster.
Warning: Running database through network file system has a lot of potential problems and is not practical, however, most of the database system provides database replication feature that can be used to synchronize the database between the two management nodes.
Warning: Running database through network file system has a lot of potential problems and is not practical, however, most of the database system provides database replication feature that can be used to synchronize the database between the two management nodes.
**3\. Mirroring**: each of the management node has its own copy of the xCAT data, and the two copies of data are synchronized through mirroring mechanism. DRBD is used widely in the high availability configuration scenarios, to provide data replication by mirroring a whole block device via network. If we put all the important data for xCAT onto the DRBD devices, then it could assure the data is synchronized between the two management nodes. Some parallel file system also provides capability to mirror data through network.
**3\. Mirroring**: each of the management node has its own copy of the xCAT data, and the two copies of data are synchronized through mirroring mechanism. DRBD is used widely in the high availability configuration scenarios, to provide data replication by mirroring a whole block device via network. If we put all the important data for xCAT onto the DRBD devices, then it could assure the data is synchronized between the two management nodes. Some parallel file system also provides capability to mirror data through network.
Manual vs. Automatic Failover
-----------------------------
When the primary management node fails, the backup management node could automatically take over, or the administrator has to perform some manual procedure to finish the failover. In general, the automatic failover takes less time to detect the failure and perform and failover, comparing with the manual failover, but the automatic failover requires more complex configuration. We could not say the automatic failover is better than the manual failover in all cases, the following factors should be considered when deciding the manual failover or automatic failover:
When the primary management node fails, the backup management node could automatically take over, or the administrator has to perform some manual procedure to finish the failover. In general, the automatic failover takes less time to detect the failure and perform and failover, comparing with the manual failover, but the automatic failover requires more complex configuration. We could not say the automatic failover is better than the manual failover in all cases, the following factors should be considered when deciding the manual failover or automatic failover:
**1\. How long the cluster could survive if the management node is down?**
If the cluster could not survive for more than several minutes, then the automatic failover might be the only option; if the compute nodes could run without the management node, at least for a while, then the manual failover could be an option.
If the cluster could not survive for more than several minutes, then the automatic failover might be the only option; if the compute nodes could run without the management node, at least for a while, then the manual failover could be an option.
From xCAT perspective, if the management node needs to provide network services like DHCP, named, ntp or nfs to the compute nodes, then the cluster probably could not survive too long if the management node is down; if the management node only performs hardware control and some other management capabilities, then the failed management node may not cause too much trouble for the cluster. xCAT provides various options for configuring if the compute nodes rely on the network services on the management node.
From xCAT perspective, if the management node needs to provide network services like DHCP, named, ntp or nfs to the compute nodes, then the cluster probably could not survive too long if the management node is down; if the management node only performs hardware control and some other management capabilities, then the failed management node may not cause too much trouble for the cluster. xCAT provides various options for configuring if the compute nodes rely on the network services on the management node.
**2\. Configuration complexity**
The configuration for the high availability applications is usually complex, it may take a long time to configure, debug and stabilize the high availability configuration.
The configuration for the high availability applications is usually complex, it may take a long time to configure, debug and stabilize the high availability configuration.
**3\. Maintenance effort**
@ -45,7 +45,7 @@ The automatic failover brings in several high availability applications, after t
Configuration Options
=====================
The combinations of data synchronization mechanism and manual/automatic failover indicates different HAMN configuration options, the table below list all the combinations (the bold numbers are the combinations xCAT has documented and tested):
The combinations of data synchronization mechanism and manual/automatic failover indicates different HAMN configuration options, the table below list all the combinations (the bold numbers are the combinations xCAT has documented and tested):
+-------------------+-------------------------+-----------------+--------------+
|# | **Move physical disks** | **Shared data** | **Mirroring**|
@ -55,14 +55,14 @@ The combinations of data synchronization mechanism and manual/automatic failover
|Automatic Failover | 4 | **5** | **6** |
+-------------------+-------------------------+-----------------+--------------+
Option 1, :ref:`setup_ha_mgmt_node_with_raid1_and_disks_move`
Option 1, :ref:`setup_ha_mgmt_node_with_raid1_and_disks_move`
Option 2, :ref:`setup_ha_mgmt_node_with_shared_data`
Option 2, :ref:`setup_ha_mgmt_node_with_shared_data`
Option 3, it is doable but not currently supported.
Option 4, it is not practical.
Option 5, :ref:`setup_xcat_high_available_management_node_with_nfs`
Option 5, :ref:`setup_xcat_high_available_management_node_with_nfs`
Option 6, :ref:`setup_ha_mgmt_node_with_drbd_pacemaker_corosync`

View File

@ -6,7 +6,7 @@ The xCAT management node plays an important role in the cluster, if the manageme
The goal of the HAMN (High Availability Management Node) configuration is, when the primary xCAT management node fails, the standby management node can take over the role of the management node, either through automatic failover or through manual procedure performed by the administrator, and thus avoid long periods of time during which your cluster does not have active cluster management function available.
The following pages describes ways to configure the xCAT Management Node for High Availability.
The following pages describes ways to configure the xCAT Management Node for High Availability.
.. toctree::
:maxdepth: 2

View File

@ -1651,7 +1651,7 @@ which should result in the following output: ::
x3550m4n01 x3550m4n02
Pacemaker Nodes:
x3550m4n01 x3550m4n02
Resources:
Resource: ip_xCAT (class=ocf provider=heartbeat type=IPaddr2)
Attributes: ip=10.1.0.1 iflabel=xCAT cidr_netmask=22 nic=eno2
@ -1714,10 +1714,10 @@ which should result in the following output: ::
Operations: start interval=0s timeout=1m (symlinks_xCAT-start-timeout-1m)
stop interval=0s timeout=1m (symlinks_xCAT-stop-timeout-1m)
monitor interval=31s on-fail=ignore (symlinks_xCAT-monitor-interval-31s)
Stonith Devices:
Fencing Levels:
Location Constraints:
Ordering Constraints:
promote ms_drbd_xCAT then start grp_xCAT (kind:Mandatory) (id:order-ms_drbd_xCAT-grp_xCAT-mandatory)
@ -1756,7 +1756,7 @@ which should result in the following output: ::
gmetad with grp_xCAT (score:INFINITY) (id:colocation-gmetad-grp_xCAT-INFINITY)
icinga with grp_xCAT (score:INFINITY) (id:colocation-icinga-grp_xCAT-INFINITY)
ip_xCAT with grp_xCAT (score:INFINITY) (id:colocation-ip_xCAT-grp_xCAT-INFINITY)
Cluster Properties:
cluster-infrastructure: corosync
cluster-name: ucl_cluster
@ -1780,12 +1780,12 @@ And the resulting output should be the following: ::
Version: 1.1.12-a14efad
2 Nodes configured
17 Resources configured
Online: [ x3550m4n01 x3550m4n02 ]
Full list of resources:
ip_xCAT (ocf::heartbeat:IPaddr2): Started x3550m4n01
NFS_xCAT (ocf::heartbeat:nfsserver): Started x3550m4n01
apache_xCAT (ocf::heartbeat:apache): Started x3550m4n01
@ -1802,7 +1802,7 @@ And the resulting output should be the following: ::
Resource Group: grp_xCAT
fs_xCAT (ocf::heartbeat:Filesystem): Started x3550m4n01
symlinks_xCAT (ocf::tummy:drbdlinks): Started x3550m4n01
PCSD Status:
x3550m4n01: Online
x3550m4n02: Online
@ -1818,7 +1818,7 @@ Further from this, the following changes needed to be made for nfs in el7 ::
Description=RPC Pipe File System
DefaultDependencies=no
Conflicts=umount.target
[Mount]
What=sunrpc
Where=/var/lib/nfs_local/rpc_pipefs
@ -1849,9 +1849,9 @@ Further from this, the following changes needed to be made for nfs in el7 ::
-After=var-lib-nfs-rpc_pipefs.mount
+Requires=var-lib-nfs_local-rpc_pipefs.mount
+After=var-lib-nfs_local-rpc_pipefs.mount
ConditionPathExists=/etc/krb5.keytab
--- /usr/lib/systemd/system/nfs-secure.service 2015-01-23 16:30:26.000000000 +0000
+++ /etc/systemd/system/nfs-secure.service 2015-10-13 01:39:36.000000000 +0100
@ -1863,9 +1863,9 @@ Further from this, the following changes needed to be made for nfs in el7 ::
-After=var-lib-nfs-rpc_pipefs.mount
+Requires=var-lib-nfs_local-rpc_pipefs.mount
+After=var-lib-nfs_local-rpc_pipefs.mount
ConditionPathExists=/etc/krb5.keytab
--- /usr/lib/systemd/system/nfs-secure-server.service 2015-01-23 16:30:26.000000000 +0000
+++ /etc/systemd/system/nfs-secure-server.service 2015-10-13 01:39:36.000000000 +0100
@ -1878,7 +1878,7 @@ Further from this, the following changes needed to be made for nfs in el7 ::
+After=var-lib-nfs_local-rpc_pipefs.mount
PartOf=nfs-server.service
PartOf=nfs-utils.service
--- /usr/lib/systemd/system/nfs-blkmap.service 2015-01-23 16:30:26.000000000 +0000
+++ /etc/systemd/system/nfs-blkmap.service 2015-10-13 01:39:36.000000000 +0100
@ -1890,7 +1890,7 @@ Further from this, the following changes needed to be made for nfs in el7 ::
-Requires=var-lib-nfs-rpc_pipefs.mount
+After=var-lib-nfs_local-rpc_pipefs.mount
+Requires=var-lib-nfs_local-rpc_pipefs.mount
Requisite=nfs-blkmap.target
After=nfs-blkmap.target

View File

@ -81,4 +81,4 @@ The failover procedure is simple and straightforward:
#. Boot up the standby management node
#. Verify the standby management node could now perform all the cluster management operations.
#. Verify the standby management node could now perform all the cluster management operations.

View File

@ -54,7 +54,7 @@ The configuration procedure will be quite different based on the shared data mec
/install
~/.xcat
/<dbdirectory>
/tftpboot
/tftpboot
.. note:: * For MySQL, the database directory is ``/var/lib/mysql``
@ -100,7 +100,7 @@ Setup xCAT on the Primary Management Node
Add the two management nodes into policy table: ::
tabdump policy
tabdump policy
"1.2","rhmn1",,,,,,"trusted",,
"1.3","rhmn2",,,,,,"trusted",,
"1.4","rhmn",,,,,,"trusted",,
@ -109,7 +109,7 @@ Setup xCAT on the Primary Management Node
chdef -t site databaseloc=/dbdirectory
#. Install and configure database. Refer to the doc [**doto:** choosing_the_Database] to configure the database on the xCAT management node. For PostgreSql, add primary and standby IP addresses access to database, use ``pgsqlsetup -i -a 9.114.47.103 -a 9.114.47.104`` to migrate an existing xCAT database from SQLite to PostgreSQL.
#. Install and configure database. Refer to the doc [**doto:** choosing_the_Database] to configure the database on the xCAT management node. For PostgreSql, add primary and standby IP addresses access to database, use ``pgsqlsetup -i -a 9.114.47.103 -a 9.114.47.104`` to migrate an existing xCAT database from SQLite to PostgreSQL.
Verify xcat is running on correct database by running: ::
@ -135,7 +135,7 @@ Setup xCAT on the Primary Management Node
#. Stop the xcatd daemon and some related network services from starting on reboot: ::
service xcatd stop
chkconfig --level 345 xcatd off
chkconfig --level 345 xcatd off
service conserver off
chkconfig --level 2345 conserver off
service dhcpd stop
@ -148,8 +148,8 @@ Setup xCAT on the Primary Management Node
#. (Optional) If DFM is being used for hardware control capabilities, install DFM package, setup xCAT to communicate directly to the System P server's service processor.::
xCAT-dfm RPM
ISNM-hdwr_svr RPM
xCAT-dfm RPM
ISNM-hdwr_svr RPM
#. If there is any node that is already managed by the Management Node,change the noderes table tftpserver & xcatmaster & nfsserver attributes to the Virtual ip
@ -160,7 +160,7 @@ Setup xCAT on the Primary Management Node
Setup xCAT on the Standby Management Node
=========================================
#. Make sure the standby management node is NOT using the shared data.
#. Make sure the standby management node is NOT using the shared data.
#. Add the alias ip address ``9.114.47.97`` into the ``/etc/resolv.conf`` as the nameserver. Change the hostname resolution order to be using ``/etc/hosts`` before using name server. Change "hosts: files dns" in /etc/nsswitch.conf.
@ -174,8 +174,8 @@ Setup xCAT on the Standby Management Node
#. (Optional) DFM only, Install DFM package: ::
xCAT-dfm RPM
ISNM-hdwr_svr RPM
xCAT-dfm RPM
ISNM-hdwr_svr RPM
#. Setup hostname resolution between the primary management node and standby management node. Make sure the primary management node can resolve the hostname of the standby management node, and vice versa.
@ -190,7 +190,7 @@ Setup xCAT on the Standby Management Node
#. Stop the xcatd daemon and related network services from starting on reboot: ::
service xcatd stop
chkconfig --level 345 xcatd off
chkconfig --level 345 xcatd off
service conserver off
chkconfig --level 2345 conserver off
service dhcpd stop
@ -458,7 +458,7 @@ The operating system is installed on the internal disks.
#. Connect the shared disk to both management nodes
To verify the shared disks are connected correctly, run the sginfo command on both management nodes and look for the same serial number in the output. Be aware that the sginfo command may not be installed by default on Linux, the sginfo command is shipped with package sg3_utils, you can manually install the package sg3_utils on both management nodes.
To verify the shared disks are connected correctly, run the sginfo command on both management nodes and look for the same serial number in the output. Be aware that the sginfo command may not be installed by default on Linux, the sginfo command is shipped with package sg3_utils, you can manually install the package sg3_utils on both management nodes.
Once the sginfo command is installed, run sginfo -l command on both management nodes to list all the known SCSI disks, for example, enter: ::
@ -473,9 +473,9 @@ The operating system is installed on the internal disks.
/dev/sg3 [=/dev/sdd scsi0 ch=0 id=4 lun=0]
/dev/sg4 [=/dev/sde scsi0 ch=0 id=5 lun=0]
Use the ``sginfo -s <device_name>`` to identify disks with the same serial number on both management nodes, for example:
Use the ``sginfo -s <device_name>`` to identify disks with the same serial number on both management nodes, for example:
On the primary management node: ::
On the primary management node: ::
[root@rhmn1 ~]# sginfo -s /dev/sdb
Serial Number '1T23043224 '
@ -487,7 +487,7 @@ The operating system is installed on the internal disks.
[root@rhmn2~]# sginfo -s /dev/sdb
Serial Number '1T23043224 '
We can see that the ``/dev/sdb`` is a shared disk on both management nodes. In some cases, as with mirrored disks and when there is no matching of serial numbers between the two management nodes, multiple disks on a single server can have the same serial number, In these cases, format the disks, mount them on both management nodes, and then touch files on the disks to determine if they are shared between the management nodes.
We can see that the ``/dev/sdb`` is a shared disk on both management nodes. In some cases, as with mirrored disks and when there is no matching of serial numbers between the two management nodes, multiple disks on a single server can have the same serial number, In these cases, format the disks, mount them on both management nodes, and then touch files on the disks to determine if they are shared between the management nodes.
#. Create partitions on shared disks
@ -495,7 +495,7 @@ The operating system is installed on the internal disks.
fdisk /dev/sdc
Verify the partitions are created by running ``fdisk -l``.
Verify the partitions are created by running ``fdisk -l``.
#. Create file systems on shared disks
@ -507,9 +507,9 @@ The operating system is installed on the internal disks.
mkfs.ext3 -v /dev/sdc4
mkfs.ext3 -v /dev/sdc5
If you place entries for the disk in ``/etc/fstab``, which is not required, ensure that the entries do not have the system automatically mount the disk.
If you place entries for the disk in ``/etc/fstab``, which is not required, ensure that the entries do not have the system automatically mount the disk.
.. note:: Since the file systems will not be mounted automatically during system reboot this must be manually done and xCAT should be started **after** the filesystem is mounted.
.. note:: Since the file systems will not be mounted automatically during system reboot this must be manually done and xCAT should be started **after** the filesystem is mounted.
#. Verify the file systems on the primary management node.
@ -525,7 +525,7 @@ The operating system is installed on the internal disks.
umount /etc/xcat
umount /install
umount ~/.xcat
umount ~/.xcat
umount /<dbdirectory>
umount /tftpboot
@ -539,9 +539,9 @@ The operating system is installed on the internal disks.
mount /dev/sdc4 /<dbdirectory>
mount /dev/sdc5/tftpboot
You may get errors "mount: you must specify the filesystem type" or "mount: special device /dev/sdb1 does not exist" when trying to mount the file systems on the standby management node, this is caused by the missing devices files on the standby management node, run ``fidsk /dev/sdx`` and simply select "w write table to disk and exit" in the fdisk menu, then retry the mount.
You may get errors "mount: you must specify the filesystem type" or "mount: special device /dev/sdb1 does not exist" when trying to mount the file systems on the standby management node, this is caused by the missing devices files on the standby management node, run ``fidsk /dev/sdx`` and simply select "w write table to disk and exit" in the fdisk menu, then retry the mount.
After that, umount the file system on the standby management node: ::
After that, umount the file system on the standby management node: ::
umount /etc/xcat
umount /install

View File

@ -1,6 +1,6 @@
.. _setup_xcat_high_available_management_node_with_nfs:
Setup xCAT HA Mgmt with NFS pacemaker and corosync
Setup xCAT HA Mgmt with NFS pacemaker and corosync
====================================================================================
In this doc, we will configure a xCAT HA cluster using ``pacemaker`` and ``corosync`` based on NFS server. ``pacemaker`` and ``corosync`` only support ``x86_64`` systems, more information about ``pacemaker`` and ``corosync`` refer to doc :ref:`setup_ha_mgmt_node_with_drbd_pacemaker_corosync`.
@ -24,19 +24,19 @@ The DB is SQLlite. There is no service node in this example.
Prepare NFS server
--------------------
In NFS server 10.2.2.44, execute commands to export fs; If you want to use another non-root user to manage xCAT, such as hpcpeadmin.
In NFS server 10.2.2.44, execute commands to export fs; If you want to use another non-root user to manage xCAT, such as hpcpeadmin.
You should create a directory for ``/home/hpcpeadmin``; Execute commands in NFS server c902f02x44. ::
# service nfs start
# mkdir ~/.xcat
# mkdir ~/.xcat
# mkdir -p /etc/xcat
# mkdir -p /disk1/install/
# mkdir -p /disk1/hpcpeadmin
# mkdir -p /disk1/install/xcat
# vi /etc/exports
/disk1/install *(rw,no_root_squash,sync,no_subtree_check)
/etc/xcat *(rw,no_root_squash,sync,no_subtree_check)
# vi /etc/exports
/disk1/install *(rw,no_root_squash,sync,no_subtree_check)
/etc/xcat *(rw,no_root_squash,sync,no_subtree_check)
/root/.xcat *(rw,no_root_squash,sync,no_subtree_check)
/root/.ssh *(rw,no_root_squash,sync,no_subtree_check)
/disk1/hpcpeadmin *(rw,no_root_squash,sync,no_subtree_check)
@ -75,12 +75,12 @@ Execute steps on xCAT MN rhmn1
Mount share nfs from 10.2.2.44: ::
# mkdir -p /install
# mkdir -p /install
# mkdir -p /etc/xcat
# mkdir -p /home/hpcpeadmin
# mount 10.2.2.44:/disk1/install /install
# mount 10.2.2.44:/etc/xcat /etc/xcat
# mkdir -p /root/.xcat
# mkdir -p /root/.xcat
# mount 10.2.2.44:/root/.xcat /root/.xcat
# mount 10.2.2.44:/root/.ssh /root/.ssh
# mount 10.2.2.44:/disk1/hpcpeadmin /home/hpcpeadmin
@ -113,16 +113,16 @@ Execute steps on xCAT MN rhmn1
Download xcat-core tar ball and xcat-dep tar ball from github, and untar them: ::
# mkdir /install/xcat
# mv xcat-core-2.8.4.tar.bz2 /install/xcat/
# mkdir /install/xcat
# mv xcat-core-2.8.4.tar.bz2 /install/xcat/
# mv xcat-dep-201404250449.tar.bz2 /install/xcat/
# cd /install/xcat
# cd /install/xcat
# tar -jxvf xcat-core-2.8.4.tar.bz2
# tar -jxvf xcat-dep-201404250449.tar.bz2
# cd xcat-core
# ./mklocalrepo.sh
# cd ../xcat-dep/rh6/x86_64/
# ./mklocalrepo.sh
# ./mklocalrepo.sh
# yum clean metadata
# yum install xCAT
# source /etc/profile.d/xcat.sh
@ -232,7 +232,7 @@ Install corosync and pacemaker on both rhmn2 and rhmn1
enabled=1
gpgcheck=0
#. Install ``corosync`` and ``pacemaker``, then generate ssh key:
#. Install ``corosync`` and ``pacemaker``, then generate ssh key:
Install ``corosync`` and ``pacemaker``: ::
@ -333,15 +333,15 @@ Be aware that you need to apply ALL the configuration at once. You cannot pick a
Check that both rhmn2 and chetha are standby state now: ::
rhmn1 ~]# crm status
Last updated: Wed Aug 13 22:57:58 2014
Last change: Wed Aug 13 22:40:31 2014 via cibadmin on rhmn1
Stack: classic openais (with plugin)
Current DC: rhmn2 - partition with quorum
Version: 1.1.8-7.el6-394e906
2 Nodes configured, 2 expected votes
14 Resources configured.
Node rhmn1: standby
rhmn1 ~]# crm status
Last updated: Wed Aug 13 22:57:58 2014
Last change: Wed Aug 13 22:40:31 2014 via cibadmin on rhmn1
Stack: classic openais (with plugin)
Current DC: rhmn2 - partition with quorum
Version: 1.1.8-7.el6-394e906
2 Nodes configured, 2 expected votes
14 Resources configured.
Node rhmn1: standby
Node rhmn2: standby
Execute ``crm configure edit`` to add all configure at once: ::
@ -481,29 +481,29 @@ Verify auto fail over
#. Let rhmn1 standby and rhmn2 online, xcat will run on rhmn2: ::
rhmn2 /]# crm node online rhmn2
rhmn2 /]# crm node standby rhmn1
rhmn2 /]# crm status
Last updated: Mon Aug 4 23:19:33 2014
Last change: Mon Aug 4 23:19:40 2014 via crm_attribute on rhmn2
Stack: classic openais (with plugin)
Current DC: rhmn1 - partition with quorum
Version: 1.1.8-7.el6-394e906
2 Nodes configured, 2 expected votes
12 Resources configured.
rhmn2 /]# crm node standby rhmn1
rhmn2 /]# crm status
Last updated: Mon Aug 4 23:19:33 2014
Last change: Mon Aug 4 23:19:40 2014 via crm_attribute on rhmn2
Stack: classic openais (with plugin)
Current DC: rhmn1 - partition with quorum
Version: 1.1.8-7.el6-394e906
2 Nodes configured, 2 expected votes
12 Resources configured.
Node rhmn1: standby
Online: [ rhmn2 ]
Node rhmn1: standby
Online: [ rhmn2 ]
Resource Group: XCAT_GROUP
xCATmnVIP (ocf::heartbeat:IPaddr2): Started rhmn2
INSTALLFS (ocf::heartbeat:Filesystem): Started rhmn2
ETCXCATFS (ocf::heartbeat:Filesystem): Started rhmn2
ROOTXCATFS (ocf::heartbeat:Filesystem): Started rhmn2
NFSlock_xCAT (lsb:nfslock): Started rhmn2
xCAT (lsb:xcatd): Started rhmn2
Clone Set: clone_named [named]
Started: [ rhmn2 ]
Stopped: [ named:1 ]
Resource Group: XCAT_GROUP
xCATmnVIP (ocf::heartbeat:IPaddr2): Started rhmn2
INSTALLFS (ocf::heartbeat:Filesystem): Started rhmn2
ETCXCATFS (ocf::heartbeat:Filesystem): Started rhmn2
ROOTXCATFS (ocf::heartbeat:Filesystem): Started rhmn2
NFSlock_xCAT (lsb:nfslock): Started rhmn2
xCAT (lsb:xcatd): Started rhmn2
Clone Set: clone_named [named]
Started: [ rhmn2 ]
Stopped: [ named:1 ]
rhmn2 /]#lsxcatd -v
Version 2.8.4 (git commit 7306ca8abf1c6d8c68d3fc3addc901c1bcb6b7b3, built Mon Apr 21 20:48:59 EDT 2014)

View File

@ -95,7 +95,7 @@ procedure to move its CNs over to the backup SN.
Move the nodes to the new service nodes
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Use the :doc:`snmove </guides/admin-guides/references/man1/snmove.1>` command to make the database changes necessary to move a set of compute nodes from one Service Node to another.
Use the :doc:`snmove </guides/admin-guides/references/man1/snmove.1>` command to make the database changes necessary to move a set of compute nodes from one Service Node to another.
To switch all the compute nodes from Service Node ``sn1`` to the backup Service Node ``sn2``, run: ::

View File

@ -1,10 +1,10 @@
Appendix C: Migrating a Management Node to a Service Node
=========================================================
Directly converting an existing Management Node to a Service Node may have some issues and is not recommended. Do the following steps to convert the xCAT Management Node into a Service node:
Directly converting an existing Management Node to a Service Node may have some issues and is not recommended. Do the following steps to convert the xCAT Management Node into a Service node:
#. backup your xCAT database on the Management Node
#. Install a new xCAT Management node
#. Restore your xCAT database into the new Management Node
#. Re-provision the old xCAT Management Node as a new Service Node
#. Re-provision the old xCAT Management Node as a new Service Node

View File

@ -7,7 +7,7 @@ Add the defined nodes into the DHCP configuration, refer to:
`XCAT_pLinux_Clusters/#configure-dhcp <http://localhost/fake_todo>`_
In the large cluster, the size of dhcp lease file "/var/lib/dhcpd/dhcpd.leases" on the DHCP server will grow over time. At around 100MB in size, the DHCP server will take a long time to respond to DHCP requests from clients and cause DHCP timeouts: ::
...
Mar 2 01:59:10 c656ems2 dhcpd: DHCPDISCOVER from 00:0a:f7:73:7d:d0 via eth0
Mar 2 01:59:10 c656ems2 dhcpd: DHCPOFFER on 9.114.39.101 to 00:0a:f7:73:7d:d0 via eth0

View File

@ -21,7 +21,7 @@ where the /tmp/servicenodes contains a host per line: ::
10.%.%.%
node2.cluster.net
**While not recommended**, if you wish to manually migrate your xCAT database, see the following documentation:
**While not recommended**, if you wish to manually migrate your xCAT database, see the following documentation:
`Manually set up MySQL <https://sourceforge.net/p/xcat/wiki/Setting_Up_MySQL_as_the_xCAT_DB/#configure-mysql-manually>`_
.. _grante_revoke_mysql_access_label:
@ -36,7 +36,7 @@ Granting/Revoking access to the database for Service Node Clients
* Granting access to the xCAT database. Service Nodes are required for xCAT hierarchical support. Compute nodes may also need access that depends on which application is going to run. (xcat201 is xcatadmin's password for following examples) ::
MariaDB > GRANT ALL on xcatdb.* TO xcatadmin@<servicenode(s)> IDENTIFIED BY 'xcat201';
Use the wildcards to do a GRANT ALL to every ipaddress or nodename that need to access the database. ::
MariaDB > GRANT ALL on xcatdb.* TO xcatadmin@'%.cluster.net' IDENTIFIED BY 'xcat201';
@ -50,4 +50,4 @@ Granting/Revoking access to the database for Service Node Clients
MariaDB > SELECT host, user FROM mysql.user;

View File

@ -15,7 +15,7 @@ The MySQL database is supported by xCAT since xCAT 2.1. MariaDB is a fork of th
| xCAT 2.10+ | Yes | Yes |
+------------+------------+------------+
MySQL/MariaDB packages are shipped as part of most Linux Distributions.
MySQL/MariaDB packages are shipped as part of most Linux Distributions.
Red Hat Enterprise Linux
@ -55,12 +55,12 @@ Suse Linux Enterprise Server
libqt4-sql-mysql-*
libmysqlclient18-*
perl-DBD-mysql-*
Debian/Ubuntu
Debian/Ubuntu
-------------
* MySQL - Using ``apt-get``, ensure that the following packages are installed on the management node: ::
* MySQL - Using ``apt-get``, ensure that the following packages are installed on the management node: ::
mysql-server
mysql-common

View File

@ -12,18 +12,18 @@ If you no longer want to use MySQL/MariaDB to maintain ``xcatdb``, and like to s
XCATBYPASS=1 restorexCATdb -p ~/xcat-dbback
* Change to PostgreSQL, following documentation: :doc:`/advanced/hierarchy/databases/postgres_install`
* Change to PostgreSQL, following documentation: :doc:`/advanced/hierarchy/databases/postgres_install`
* Change back to default xCAT database, SQLite (**Note**: xCAT Hierarchy cluster will no longer work)
#. Stop the ``xcatd`` daemon on the management node. ::
#. Stop the ``xcatd`` daemon on the management node. ::
service xcatd stop
#. Remove the ``xatdb`` from MySQL/MariaDB (optional): ::
#. Remove the ``xatdb`` from MySQL/MariaDB (optional): ::
/usr/bin/mysql -u root -p
/usr/bin/mysql -u root -p
drop the xcatdb: ::
@ -34,10 +34,10 @@ If you no longer want to use MySQL/MariaDB to maintain ``xcatdb``, and like to s
mysql> drop user xcatadm;
#. Move, or remove, the ``/etc/xcat/cfglog`` file as it points xCAT to MySQL/MariaDB. (without this file, xCAT defaults to SQLite): ::
rm /etc/xcat/cfgloc
rm /etc/xcat/cfgloc
#. Restart ``xcatd``: ::
service xcatd start
service xcatd start

View File

@ -20,17 +20,17 @@ Start/Stop MySQL/MariaDB service
service mysql stop
Basic MySQL/MariaDB commands
Basic MySQL/MariaDB commands
-----------------------------
Refer to `<https://www.mariadb.org/>`_ for the latest documentation.
* Using ``mysql``, connect to the xcat database: ::
mysql -u root -p
* List the hosts and users which managed by this xcat MN: ::
MariaDB> SELECT host, user FROM mysql.user;
* List the databases: ::

View File

@ -8,13 +8,13 @@ A utility is provided to migrate an existing xCAT database from SQLite to Postgr
pgsqlsetup -i -V
**While not recommended**, if you wish to manually migrate your xCAT database, see the following documentation:
**While not recommended**, if you wish to manually migrate your xCAT database, see the following documentation:
`Manually set up PostgreSQL <https://sourceforge.net/p/xcat/wiki/Setting_Up_PostgreSQL_as_the_xCAT_DB/#manually-setup-postgresql>`_
Setting up the Service Nodes
Setting up the Service Nodes
----------------------------
For service nodes, add the IP address of each service nodes to the postgres configuration file: ``/var/lib/pgsql/data/pg_hba.conf``
For service nodes, add the IP address of each service nodes to the postgres configuration file: ``/var/lib/pgsql/data/pg_hba.conf``
If you had the following two service nodes: ::
@ -29,10 +29,10 @@ You would add the following to ``/var/lib/pgsql/data/pg_hba.conf`` ::
Restart PostgreSQL after editing the file: ::
service postgresql restart
service postgresql restart
For more information about changing the ``pg_hab.conf`` file and ``postgresql.conf`` files, see the following documentation:
For more information about changing the ``pg_hab.conf`` file and ``postgresql.conf`` files, see the following documentation:
`Setup the PostgreSQL Configuration Files <https://sourceforge.net/p/xcat/wiki/Setting_Up_PostgreSQL_as_the_xCAT_DB/#setup-the-postgresql-configuration-files>`_
.. _modify_postgresql_database_diretory:

View File

@ -16,7 +16,7 @@ Using yum, install the following rpms: ::
Suse Linux Enterprise Server
----------------------------
**Note:** On SLES, ``perl-DBD`` packages are provided on the SDK iso images.
**Note:** On SLES, ``perl-DBD`` packages are provided on the SDK iso images.
Using zyppr, install the following rpms: ::
@ -24,7 +24,7 @@ Using zyppr, install the following rpms: ::
zypper install perl-DBD-Pg
Debian/Ubuntu
Debian/Ubuntu
-------------
Using apt, install the following packages: ::

View File

@ -8,12 +8,12 @@ To remove ``xcatdb`` completely from the PostgreSQL database:
mkdir -p ~/xcat-dbback
dumpxCATdb -p ~/xcat-dbback
#. Stop the ``xcatd`` daemon on the management node.
#. Stop the ``xcatd`` daemon on the management node.
**Note:** If you are using *xCAT Hierarchy (service nodes)* and removing ``xcatdb`` from postgres, hierarchy will no longer work. You will need to configure another database which supports remote database access to continue using the hierarchy feature. ::
service xcatd stop
#. Remove the ``xatdb`` from PostgreSQL: ::
#. Remove the ``xatdb`` from PostgreSQL: ::
su - postgres
@ -31,7 +31,7 @@ To remove ``xcatdb`` completely from the PostgreSQL database:
rm -rf *
#. Move, or remove, the ``/etc/xcat/cfglog`` file as it points xCAT to PostgreSQL. (without this file, xCAT defaults to SQLite): ::
mv /etc/xcat/cfgloc /etc/xcat/cfglog.postgres
#. Restore the PostgreSQL database into SQLite: ::
@ -40,5 +40,5 @@ To remove ``xcatdb`` completely from the PostgreSQL database:
#. Restart ``xcatd``: ::
service xcatd start
service xcatd start

View File

@ -9,13 +9,13 @@ Using PostgreSQL
Use the psql command line utility to connect to the PostgreSQL database: ::
su - postgres
psql -h <hostname> -U xcatadm -d xcatdb
psql -h <hostname> -U xcatadm -d xcatdb
Useful Commands
---------------
* Show create statement for a table, for example prescripts table. ::
* Show create statement for a table, for example prescripts table. ::
/usr/bin/pg_dump xcatdb -U xcatadm -t prescripts
@ -26,7 +26,7 @@ Useful Commands
# drop the xcatdb
dropdb xcatdb
# remove the xcatadm database owner
# remove the xcatadm database owner
dropuser xcatadm
# clean up the postgresql files (necessary if you want to re-create the database)
@ -38,7 +38,7 @@ Useful Commands
su - postgres
psql -l
* Access the database: ::
* Access the database: ::
su - postgres
psql xcatdb

View File

@ -12,8 +12,8 @@ Using ``psql``, connect to the xcat database: ::
list the xCAT tables: ::
xcatdb=> \dt
show the entries in the nodelist table: ::
show the entries in the nodelist table: ::
xcatdb=> select * from nodelist;
@ -29,7 +29,7 @@ Show the SQL create statement for a table: ::
/usr/bin/pg_dump_xcatdb -U xcatadm -t <table_name>
# example, for prescripts table:
# example, for prescripts table:
/usr/bin/pg_dump xcatdb -U xcatadm -t prescripts
List all databases in postgres: ::

View File

@ -4,16 +4,16 @@ Define and install your Compute Nodes
Make /install available on the Service Nodes
--------------------------------------------
Note that all of the files and directories pointed to by your osimages should
be placed under the directory referred to in site.installdir (usually
/install), so they will be available to the service nodes. The installdir
directory is mounted or copied to the service nodes during the hierarchical
Note that all of the files and directories pointed to by your osimages should
be placed under the directory referred to in site.installdir (usually
/install), so they will be available to the service nodes. The installdir
directory is mounted or copied to the service nodes during the hierarchical
installation.
If you are not using the NFS-based statelite method of booting your compute
nodes and you are not using service node pools, set the installloc attribute
to "/install". This instructs the service node to mount /install from the
management node. (If you don't do this, you have to manually sync /install
If you are not using the NFS-based statelite method of booting your compute
nodes and you are not using service node pools, set the installloc attribute
to "/install". This instructs the service node to mount /install from the
management node. (If you don't do this, you have to manually sync /install
between the management node and the service nodes.)
::
@ -23,17 +23,17 @@ between the management node and the service nodes.)
Make compute node syncfiles available on the servicenodes
---------------------------------------------------------
If you are not using the NFS-based statelite method of booting your compute
nodes, and you plan to use the syncfiles postscript to update files on the
nodes during install, you must ensure that those files are sync'd to the
servicenodes before the install of the compute nodes. To do this after your
nodes are defined, you will need to run the following whenever the files in
If you are not using the NFS-based statelite method of booting your compute
nodes, and you plan to use the syncfiles postscript to update files on the
nodes during install, you must ensure that those files are sync'd to the
servicenodes before the install of the compute nodes. To do this after your
nodes are defined, you will need to run the following whenever the files in
your synclist change on the Management Node:
::
updatenode <computenoderange> -f
At this point you can return to the documentation for your cluster environment
At this point you can return to the documentation for your cluster environment
to define and deploy your compute nodes.

View File

@ -16,8 +16,8 @@ group called **service**. Some of the commands in this document will use the
group **service** to update all service nodes.
Note: a Service Node's service node is the Management Node; so a service node
must have a direct connection to the management node. The compute nodes do not
have to be directly attached to the Management Node, only to their service
must have a direct connection to the management node. The compute nodes do not
have to be directly attached to the Management Node, only to their service
node. This will all have to be defined in your networks table.
Add Service Nodes to the nodelist Table
@ -25,11 +25,11 @@ Add Service Nodes to the nodelist Table
Define your service nodes (if not defined already), and by convention we put
them in a **service** group. We usually have a group compute for our compute
nodes, to distinguish between the two types of nodes. (If you want to use your
own group name for service nodes, rather than service, you need to change some
defaults in the xCAT db that use the group name service. For example, in the
postscripts table there is by default a group entry for service, with the
appropriate postscripts to run when installing a service node. Also, the
nodes, to distinguish between the two types of nodes. (If you want to use your
own group name for service nodes, rather than service, you need to change some
defaults in the xCAT db that use the group name service. For example, in the
postscripts table there is by default a group entry for service, with the
appropriate postscripts to run when installing a service node. Also, the
default ``kickstart/autoyast`` template, pkglist, etc that will be used have
files names based on the profile name service.) ::
@ -44,7 +44,7 @@ appropriate for SNs. Display the list of osimages and choose one with
lsdef -t osimage
For this example, let's assume you chose the stateful osimage definition for
For this example, let's assume you chose the stateful osimage definition for
rhels 7: rhels7-x86_64-install-service . If you want to modify any of the
osimage attributes (e.g. ``kickstart/autoyast`` template, pkglist, etc),
make a copy of the osimage definition and also copy to ``/install/custom``
@ -63,16 +63,16 @@ Now set some of the common attributes for the SNs at the group level: ::
Add Service Nodes to the servicenode Table
------------------------------------------
An entry must be created in the servicenode table for each service node or the
service group. This table describes all the services you would like xcat to
setup on the service nodes. (Even if you don't want xCAT to set up any
services - unlikely - you must define the service nodes in the servicenode
table with at least one attribute set (you can set it to 0), otherwise it will
An entry must be created in the servicenode table for each service node or the
service group. This table describes all the services you would like xcat to
setup on the service nodes. (Even if you don't want xCAT to set up any
services - unlikely - you must define the service nodes in the servicenode
table with at least one attribute set (you can set it to 0), otherwise it will
not be recognized as a service node.)
When the xcatd daemon is started or restarted on the service node, it will
make sure all of the requested services are configured and started. (To
temporarily avoid this when restarting xcatd, use "service xcatd reload"
When the xcatd daemon is started or restarted on the service node, it will
make sure all of the requested services are configured and started. (To
temporarily avoid this when restarting xcatd, use "service xcatd reload"
instead.)
To set up the minimum recommended services on the service nodes: ::
@ -138,9 +138,9 @@ value as the node's servicenode attribute.
Host name resolution must have been setup in advance, with ``/etc/hosts``, DNS
or dhcp to ensure that the names put in this table can be resolved on the
Management Node, Service nodes, and the compute nodes. It is easiest to have a
node group of the compute nodes for each service node. For example, if all the
nodes in node group compute1 are serviced by sn1 and all the nodes in node
Management Node, Service nodes, and the compute nodes. It is easiest to have a
node group of the compute nodes for each service node. For example, if all the
nodes in node group compute1 are serviced by sn1 and all the nodes in node
group compute2 are serviced by sn2:
::
@ -148,18 +148,18 @@ group compute2 are serviced by sn2:
chdef -t group compute1 servicenode=sn1 xcatmaster=sn1-c
chdef -t group compute2 servicenode=sn2 xcatmaster=sn2-c
Note: in this example, sn1 and sn2 are the node names of the service nodes
(and therefore the hostnames associated with the NICs that the MN talks to).
The hostnames sn1-c and sn2-c are associated with the SN NICs that communicate
Note: in this example, sn1 and sn2 are the node names of the service nodes
(and therefore the hostnames associated with the NICs that the MN talks to).
The hostnames sn1-c and sn2-c are associated with the SN NICs that communicate
with their compute nodes.
Note: if not set, the attribute tftpserver's default value is xcatmaster,
but in some releases of xCAT it has not defaulted correctly, so it is safer
to set the tftpserver to the value of xcatmaster.
These attributes will allow you to specify which service node should run the
conserver (console) and monserver (monitoring) daemon for the nodes in the
group specified in the command. In this example, we are having each node's
These attributes will allow you to specify which service node should run the
conserver (console) and monserver (monitoring) daemon for the nodes in the
group specified in the command. In this example, we are having each node's
primary SN also act as its conserver and monserver (the most typical setup).
::
@ -169,43 +169,43 @@ primary SN also act as its conserver and monserver (the most typical setup).
Service Node Pools
^^^^^^^^^^^^^^^^^^
Service Node Pools are multiple service nodes that service the same set of
compute nodes. Having multiple service nodes allows backup service node(s) for
a compute node when the primary service node is unavailable, or can be used
for work-load balancing on the service nodes. But note that the selection of
which SN will service which compute node is made at compute node boot time.
After that, the selection of the SN for this compute node is fixed until the
compute node is rebooted or the compute node is explicitly moved to another SN
Service Node Pools are multiple service nodes that service the same set of
compute nodes. Having multiple service nodes allows backup service node(s) for
a compute node when the primary service node is unavailable, or can be used
for work-load balancing on the service nodes. But note that the selection of
which SN will service which compute node is made at compute node boot time.
After that, the selection of the SN for this compute node is fixed until the
compute node is rebooted or the compute node is explicitly moved to another SN
using the `snmove <http://localhost/fake_todo>`_ command.
To use Service Node pools, you need to architect your network such that all of
the compute nodes and service nodes in a particular pool are on the same flat
To use Service Node pools, you need to architect your network such that all of
the compute nodes and service nodes in a particular pool are on the same flat
network. If you don't want the management node to respond to manage some of
the compute nodes, it shouldn't be on that same flat network. The
the compute nodes, it shouldn't be on that same flat network. The
site, dhcpinterfaces attribute should be set such that the SNs' DHCP daemon
only listens on the NIC that faces the compute nodes, not the NIC that faces
the MN. This avoids some timing issues when the SNs are being deployed (so
that they don't respond to each other before they are completely ready). You
only listens on the NIC that faces the compute nodes, not the NIC that faces
the MN. This avoids some timing issues when the SNs are being deployed (so
that they don't respond to each other before they are completely ready). You
also need to make sure the `networks <http://localhost/fake_todo>`_ table
accurately reflects the physical network structure.
To define a list of service nodes that support a set of compute nodes, set the
servicenode attribute to a comma-delimited list of the service nodes. When
running an xCAT command like xdsh or updatenode for compute nodes, the list
will be processed left to right, picking the first service node on the list to
run the command. If that service node is not available, then the next service
node on the list will be chosen until the command is successful. Errors will
be logged. If no service node on the list can process the command, then the
error will be returned. You can provide some load-balancing by assigning your
To define a list of service nodes that support a set of compute nodes, set the
servicenode attribute to a comma-delimited list of the service nodes. When
running an xCAT command like xdsh or updatenode for compute nodes, the list
will be processed left to right, picking the first service node on the list to
run the command. If that service node is not available, then the next service
node on the list will be chosen until the command is successful. Errors will
be logged. If no service node on the list can process the command, then the
error will be returned. You can provide some load-balancing by assigning your
service nodes as we do below.
When using service node pools, the intent is to have the service node that
responds first to the compute node's DHCP request during boot also be the
xcatmaster, the tftpserver, and the NFS/http server for that node. Therefore,
the xcatmaster and nfsserver attributes for nodes should not be set. When
nodeset is run for the compute nodes, the service node interface on the
network to the compute nodes should be defined and active, so that nodeset
will default those attribute values to the "node ip facing" interface on that
When using service node pools, the intent is to have the service node that
responds first to the compute node's DHCP request during boot also be the
xcatmaster, the tftpserver, and the NFS/http server for that node. Therefore,
the xcatmaster and nfsserver attributes for nodes should not be set. When
nodeset is run for the compute nodes, the service node interface on the
network to the compute nodes should be defined and active, so that nodeset
will default those attribute values to the "node ip facing" interface on that
service node.
For example: ::
@ -213,7 +213,7 @@ For example: ::
chdef -t node compute1 servicenode=sn1,sn2 xcatmaster="" nfsserver=""
chdef -t node compute2 servicenode=sn2,sn1 xcatmaster="" nfsserver=""
You need to set the sharedtftp site attribute to 0 so that the SNs will not
You need to set the sharedtftp site attribute to 0 so that the SNs will not
automatically mount the ``/tftpboot`` directory from the management node:
::
@ -233,8 +233,8 @@ from this rsync.
rsync -auv --exclude 'autoinst' /install sn1:/
Note: If your service nodes are stateless and site.sharedtftp=0, if you reboot
any service node when using servicenode pools, any data written to the local
Note: If your service nodes are stateless and site.sharedtftp=0, if you reboot
any service node when using servicenode pools, any data written to the local
``/tftpboot`` directory of that SN is lost. You will need to run nodeset for
all of the compute nodes serviced by that SN again.
@ -244,14 +244,14 @@ networks table, see ref: networks table, see :ref:`setup_networks_table_label`.
Conserver and Monserver and Pools
"""""""""""""""""""""""""""""""""
The support of conserver and monserver with Service Node Pools is still not
supported. You must explicitly assign these functions to a service node using
The support of conserver and monserver with Service Node Pools is still not
supported. You must explicitly assign these functions to a service node using
the nodehm.conserver and noderes.monserver attribute as above.
Setup Site Table
----------------
If you are not using the NFS-based statelite method of booting your compute
If you are not using the NFS-based statelite method of booting your compute
nodes, set the installloc attribute to ``/install``. This instructs the
service node to mount ``/install`` from the management node. (If you don't do
this, you have to manually sync ``/install`` between the management node and
@ -259,11 +259,11 @@ the service nodes.) ::
chdef -t site clustersite installloc="/install"
For IPMI controlled nodes, if you want the out-of-band IPMI operations to be
done directly from the management node (instead of being sent to the
For IPMI controlled nodes, if you want the out-of-band IPMI operations to be
done directly from the management node (instead of being sent to the
appropriate service node), set site.ipmidispatch=n.
If you want to throttle the rate at which nodes are booted up, you can set the
If you want to throttle the rate at which nodes are booted up, you can set the
following site attributes:
@ -278,7 +278,7 @@ See the `site table man page <http://localhost/fack_todo>`_ for details.
Setup networks Table
--------------------
All networks in the cluster must be defined in the networks table. When xCAT
All networks in the cluster must be defined in the networks table. When xCAT
is installed, it runs makenetworks, which creates an entry in the networks
table for each of the networks the management node is on. You need to add
entries for each network the service nodes use to communicate to the compute
@ -288,22 +288,22 @@ For example: ::
mkdef -t network net1 net=10.5.1.0 mask=255.255.255.224 gateway=10.5.1.1
If you want to set the nodes' xcatmaster as the default gateway for the nodes,
the gateway attribute can be set to keyword "<xcatmaster>". In this case, xCAT
code will automatically substitute the IP address of the node's xcatmaster for
If you want to set the nodes' xcatmaster as the default gateway for the nodes,
the gateway attribute can be set to keyword "<xcatmaster>". In this case, xCAT
code will automatically substitute the IP address of the node's xcatmaster for
the keyword. Here is an example:
::
mkdef -t network net1 net=10.5.1.0 mask=255.255.255.224 gateway=<xcatmaster>
The ipforward attribute should be enabled on all the xcatmaster nodes that
will be acting as default gateways. You can set ipforward to 1 in the
servicenode table or add the line "net.ipv4.ip_forward = 1" in file
The ipforward attribute should be enabled on all the xcatmaster nodes that
will be acting as default gateways. You can set ipforward to 1 in the
servicenode table or add the line "net.ipv4.ip_forward = 1" in file
``/etc/sysctl.conf`` and then run "sysctl -p /etc/sysctl.conf" manually to
enable the ipforwarding.
Note:If using service node pools, the networks table dhcpserver attribute can
be set to any single service node in your pool. The networks tftpserver, and
Note:If using service node pools, the networks table dhcpserver attribute can
be set to any single service node in your pool. The networks tftpserver, and
nameserver attributes should be left blank.
Verify the Tables
@ -317,17 +317,17 @@ compute1, compute2: ::
Add additional adapters configuration script (optional)
------------------------------------------------------------
It is possible to have additional adapter interfaces automatically configured
when the nodes are booted. XCAT provides sample configuration scripts for
ethernet, IB, and HFI adapters. These scripts can be used as-is or they can be
modified to suit your particular environment. The ethernet sample is
It is possible to have additional adapter interfaces automatically configured
when the nodes are booted. XCAT provides sample configuration scripts for
ethernet, IB, and HFI adapters. These scripts can be used as-is or they can be
modified to suit your particular environment. The ethernet sample is
``/install/postscript/configeth``. When you have the configuration script that
you want you can add it to the "postscripts" attribute as mentioned above. Make
sure your script is in the ``/install/postscripts`` directory and that it is
executable.
Note: For system p servers, if you plan to have your service node perform the
hardware control functions for its compute nodes, it is necessary that the SN
Note: For system p servers, if you plan to have your service node perform the
hardware control functions for its compute nodes, it is necessary that the SN
ethernet network adapters connected to the HW service VLAN be configured.
Configuring Secondary Adapters

View File

@ -3,7 +3,7 @@ Define Service Nodes
This next part shows how to configure a xCAT Hierarchy and provision xCAT service nodes from an existing xCAT cluster.
*The document assumes that the compute nodes that are part of your cluster have already been defined into the xCAT database and you have successfully provisioned the compute nodes using xCAT*
*The document assumes that the compute nodes that are part of your cluster have already been defined into the xCAT database and you have successfully provisioned the compute nodes using xCAT*
The following table illustrates the cluster being used in this example:
@ -33,8 +33,8 @@ The following table illustrates the cluster being used in this example:
chdef -t site hierarchicalattrs="postscripts"
#. Select the compute nodes that will become service nodes
#. Select the compute nodes that will become service nodes
The first node in each rack, ``r1n01`` and ``r2n01``, is selected to become the xCAT service nodes and manage the compute nodes in that rack
@ -53,7 +53,7 @@ The following table illustrates the cluster being used in this example:
chdef -t group -o service setupnfs=1 \
setupdhcp=1 \
setuptftp=1 \
setuptftp=1 \
setupnameserver=1 \
setupconserver=2
@ -63,26 +63,26 @@ The following table illustrates the cluster being used in this example:
* For clusters with subnetted management networks, you might want to set ``setupupforward=1``
* For the ``setupconserver`` attribute, if ``conserver`` is used, set to ``1``, if ``goconserver`` is used, set to ``2``
#. Add additional postscripts for Service Nodes (optional)
#. Add additional postscripts for Service Nodes (optional)
By default, xCAT will execute the ``servicenode`` postscript when installed or diskless booted. This postscript will set up the necessary credentials and installs the xCAT software on the Service Nodes. If you have additional postscripts that you want to execute on the service nodes, copy to ``/install/postscripts`` and run the following: ::
chdef -t group -o service -p postscripts=<mypostscript>
#. Assigning Compute Nodes to their Service Nodes
#. Assigning Compute Nodes to their Service Nodes
The node attributes ``servicenode`` and ``xcatmaster``, define which Service node will serve the particular compute node.
The node attributes ``servicenode`` and ``xcatmaster``, define which Service node will serve the particular compute node.
* ``servicenode`` - defines which Service Node the **Management Node** should send commands to (e.g ``xdsh``) and should be set to the hostname or IP address of the service node that the management node can contact it by.
* ``xcatmaster`` - defines which Service Node the **Compute Node** should boot from and should be set to the hostname or IP address of the service node that the compute node can contact it by.
You must set both ``servicenode`` and ``xcatmaster`` regardless of whether or not you are using service node pools, for most scenarios, the value will be identical. ::
chdef -t group -o rack1 servicenode=r1n01 xcatmaster=r1n01
chdef -t group -o rack1 servicenode=r1n01 xcatmaster=r1n01
chdef -t group -o rack2 servicenode=r2n01 xcatmaster=r2n01
#. Set the conserver and monserver attributes
Set which service node should run the conserver (console) and monserver (monitoring) daemon for the nodes in the group. The most typical setup is to have the service node also ad as it's conserver and monserver. ::
chdef -t group -o rack1 conserver=r1n01 monserver=r1n01
@ -101,7 +101,7 @@ The following table illustrates the cluster being used in this example:
chdef -t site clustersite sharedtftp=0
chdef -t site clustersite installloc=
rsync -auv --exclude 'autoinst' /install r1n01:/
rsync -auv --exclude 'autoinst' /install r2n01:/
rsync -auv --exclude 'autoinst' /tftpboot r1n01:/
rsync -auv --exclude 'autoinst' /tftpboot r2n01:/
rsync -auv --exclude 'autoinst' /install r1n01:/
rsync -auv --exclude 'autoinst' /install r2n01:/
rsync -auv --exclude 'autoinst' /tftpboot r1n01:/
rsync -auv --exclude 'autoinst' /tftpboot r2n01:/

View File

@ -1,7 +1,7 @@
Hierarchical Clusters / Large Cluster Support
=============================================
xCAT supports management of very large sized cluster by creating a **Hierarchical Cluster** and the concept of **xCAT Service Nodes**.
xCAT supports management of very large sized cluster by creating a **Hierarchical Cluster** and the concept of **xCAT Service Nodes**.
When dealing with large clusters, to balance the load, it is recommended to have more than one node (Management Node, "MN") handling the installation and management of the Compute Nodes ("CN"). These additional *helper* nodes are referred to as **Service Nodes** ("SN"). The Management Node can delegate all management operational needs to the Service Node responsible for a set of compute node.

View File

@ -11,7 +11,7 @@ Any cluster using statelite compute nodes must use a stateful (diskful) Service
Configure ``otherpkgdir`` and ``otherpkglist`` for service node osimage
-----------------------------------------------------------------------
* Create a subdirectory ``xcat`` under a path specified by ``otherpkgdir`` attribute of the service node os image, selected during the :doc:`../define_service_nodes` step.
* Create a subdirectory ``xcat`` under a path specified by ``otherpkgdir`` attribute of the service node os image, selected during the :doc:`../define_service_nodes` step.
For example, for osimage *rhels7-x86_64-install-service* ::
@ -114,17 +114,17 @@ Watch the installation progress using either wcons or rcons: ::
Update Service Node Diskful Image
---------------------------------
To update the xCAT software on the Service Node:
To update the xCAT software on the Service Node:
#. Remove previous xcat-core, xcat-dep, and tar files in the NFS mounted ``/install/post/otherpkgs/`` directory: ::
rm /install/post/otherpkgs/<os>/<arch>/xcat/xcat-core
rm /install/post/otherpkgs/<os>/<arch>/xcat/xcat-dep
rm /install/post/otherpkgs/<os>/<arch>/xcat/<xcat-core.tar>
rm /install/post/otherpkgs/<os>/<arch>/xcat/<xcat-dep.tar>
#. Download the desired tar files from xcat.org on to the Management Node, and untar them in the same NFS mounted ``/install/post/otherpkgs/`` directory: ::
cd /install/post/otherpkgs/<os>/<arch>/xcat/
tar jxvf <new-xcat-core.tar>
tar jxvf <new-xcat-dep.tar>

View File

@ -67,7 +67,7 @@ When you run ``copycds``, xCAT will only create a Service Node stateful osimage
If you would like to change any of these files, copy them to a custom
directory. This can be any directory you choose, but we recommend that you
keep it /install somewhere. A good location is something like ``/install/custom/netboot/<osimage>``.
keep it /install somewhere. A good location is something like ``/install/custom/netboot/<osimage>``.
::
@ -172,8 +172,8 @@ When you run ``copycds``, xCAT will only create a Service Node stateful osimage
If you installed your management node directly from the online
repository, you will need to download the ``xcat-core`` and ``xcat-dep`` tarballs
- From http://xcat.org/download.html, download the ``xcat-core`` and ``xcat-dep`` tarball files.
Copy these into a subdirectory in the ``otherpkgdir`` directory.
- From http://xcat.org/download.html, download the ``xcat-core`` and ``xcat-dep`` tarball files.
Copy these into a subdirectory in the ``otherpkgdir`` directory.
::
@ -184,9 +184,9 @@ When you run ``copycds``, xCAT will only create a Service Node stateful osimage
cd /install/post/otherpkgs/rhels7.3/ppc64le
mkdir xcat
cd xcat
# copy the <xcat-core> and <xcat-deb> tarballs here
# extract the tarballs
tar -jxvf <xcat-core>.tar.bz2
tar -jxvf <xcat-dep>.tar.bz2

View File

@ -1,7 +1,7 @@
Service Nodes
=============
Service Nodes are similar to the xCAT Management Node in that each service Nodes runs an instance of the xCAT daemon: ``xcatd``. ``xcatd``'s communicate with each other using the same XML/SSL protocol that the xCAT client uses to communicate with ``xcatd`` on the Management Node.
Service Nodes are similar to the xCAT Management Node in that each service Nodes runs an instance of the xCAT daemon: ``xcatd``. ``xcatd``'s communicate with each other using the same XML/SSL protocol that the xCAT client uses to communicate with ``xcatd`` on the Management Node.
The Service Nodes need to communicate with the xCAT database running on the Management Node. This is done using the remote client capabilities of the database. This is why the default SQLite database cannot be used.

View File

@ -6,15 +6,15 @@ Use the :doc:`buildkit </guides/admin-guides/references/man1/buildkit.1>` comman
buildkit create <kitbasename> [-l|--kitloc <kit location>]
Kit Directory
Kit Directory
-------------
The Kit directory location will be automatically populated with additional subdirecotries and samples:
The Kit directory location will be automatically populated with additional subdirecotries and samples:
**buildkit.conf** - The sample Kit build configuration file.
**source_packages** - This directory stores the source packages for Kit Packages and Non-Native Packages. The **buildkit** command will search these directories for source packages when building packages. This directory stores:
* RPM spec and tarballs. (A sample spec file is provided.)
* Source RPMs.
* Pre-built RPMs (contained in a subdirectory of source_packages)
@ -42,7 +42,7 @@ The Kit directory location will be automatically populated with additional subd
Kit Configuration File
----------------------
The ``buildkit.conf`` file is a sample file that contains a description of all the supported attributes and indicates required or optional fields. The user needs to modify this file for the software kit to be built. [#]_
The ``buildkit.conf`` file is a sample file that contains a description of all the supported attributes and indicates required or optional fields. The user needs to modify this file for the software kit to be built. [#]_
**kit** --- This stanza defines general information for the Kit. There must be exactly one kit stanza in a kit build file. ::
@ -71,12 +71,12 @@ The ``buildkit.conf`` file is a sample file that contains a description of all t
osarch=x86_64
minor version can be support following format: ::
osminorversion=2 <<-- minor version has to be exactly matched to 2
osminorversion=>=2 <<-- minor version can be 2 or greater than 2
osminorversion=<=2 <<-- minor version can be 2 or less than 2
osminorversion=<=2 <<-- minor version can be 2 or less than 2
osminorversion=>2 <<-- minor version has to be greater than 2
osminorversion=<2 <<-- minor version has to be less than 2
osminorversion=<2 <<-- minor version has to be less than 2
**kitcomponent** --- This stanza defines one Kit Component. A kitcomponent definition is a way of specifying a subset of the product Kit that may be installed into an xCAT osimage. A kitcomponent may or may not be dependent on other kitcomponents.If user want to build a component which supports multiple OSes, need to create one kitcomponent stanza for each OS. ::
@ -106,13 +106,13 @@ minor version can be support following format: ::
serverroles=compute
ospkgdeps=at,rsh-server,xinetd,sudo,libibverbs-32bit,libibverbs,insserv
kitrepoid=sles11_x86_64
kitpkgdeps=ppe_rte_license
kitpkgdeps=ppe_rte_license
**kitpackage** --- This stanza defines Kit Package (ie. RPM). There can be zero or more kitpackage stanzas. For multiple package supports, need to
**kitpackage** --- This stanza defines Kit Package (ie. RPM). There can be zero or more kitpackage stanzas. For multiple package supports, need to
#. Define one kitpackage section per supported OS. or
#. Define one kitpacakge stanza which contains multiple kitrepoid lines. For the RPM packages, users need to responsible for creating an RPM spec file that can run on multiple OSes.
#. Define one kitpacakge stanza which contains multiple kitrepoid lines. For the RPM packages, users need to responsible for creating an RPM spec file that can run on multiple OSes.
::
@ -149,7 +149,7 @@ minor version can be support following format: ::
Partial vs. Complete Kits
-------------------------
A **complete** software kits includes all the product software and is ready to be consumed as is. A **partial** software kit is one that does not include all the product packages and requires the consumer to download the product software and complete the kit before it can be consumed.
A **complete** software kits includes all the product software and is ready to be consumed as is. A **partial** software kit is one that does not include all the product packages and requires the consumer to download the product software and complete the kit before it can be consumed.
To build partial kits, the ``isexternalpkg=yes`` needs to be set in the ``kitpackage`` stanza in the ``buildkit.conf`` file: ::

View File

@ -25,4 +25,4 @@ If the Kit Package Repository is not fully built, the command builds it as follo
#. Build the Component Meta-Packages associated with this Kit Package Repository. Create the packages under the Kit Package Repository directory
#. Build the Kit Packages associated with this Kit Package Repository. Create the packages under the Kit Package Repository directory
#. Build the repository meta-data for the Kit Package Repository. The repository meta-data is based on the OS native package format. For example, for RHEL, we build the YUM repository meta-data with the createrepo command.

View File

@ -37,7 +37,7 @@ Follow these steps to complete the kit build process for a partial kit.
#. copy the partial kit to a working directory
#. copy the product software packages to a convenient location or locations
#. cd to the working directory
#. Build the complete kit tarfile
#. Build the complete kit tarfile
::

View File

@ -5,5 +5,5 @@ After modifying the ``buildkit.conf`` file and copying all the necessary files t
buildkit chkconfig
This command will verify all required fields defined in the buildkit.conf. If errors are found, fix the specified error and rerun the command until all fields are validated.
This command will verify all required fields defined in the buildkit.conf. If errors are found, fix the specified error and rerun the command until all fields are validated.

View File

@ -1,4 +1,4 @@
Introduction
Introduction
============
Contents
@ -6,10 +6,10 @@ Contents
A Software Kit is a tar file that contains the following:
**Kit Configuration File** --- A file describing the contents of this kit and contains following information
**Kit Configuration File** --- A file describing the contents of this kit and contains following information
* Kit name, version, description, supported OS distributions, license information, and deployment parameters
* Kit repository information including name, supported OS distributions, and supported architectures
* Kit repository information including name, supported OS distributions, and supported architectures
* Kit component information including name, version, description, server roles, scripts, and other data
**Kit Repositories** --- A directory for each operating system version this kit is supported in. Each directory contains all of the product software packages required for that environment along with repository metadata.
@ -42,14 +42,14 @@ Once the kit components are added to xCAT osimage definitions, administrators ca
#. ``genimage`` command to create a diskless OS image installing the kit components for diskless OS provisioning
#. ``updatenode`` command to install the kit components on existing deployed nodes
The ``kitcomponent`` metadata defines the kit packages as dependency packages and the OS package manager (``yum``, ``zypper``, ``apt-get``) automatically installes the required packages during the xCAT ``otherpkgs`` install process.
The ``kitcomponent`` metadata defines the kit packages as dependency packages and the OS package manager (``yum``, ``zypper``, ``apt-get``) automatically installes the required packages during the xCAT ``otherpkgs`` install process.
Kit Framework
-------------
With time, the implementation of the xCAT Software Kit support may change.
With time, the implementation of the xCAT Software Kit support may change.
In order to process a kit successfully, the kit must be compatible with the level of xCAT code that was used to build the kit. The xCAT kit commands and software kits contain the framework version and compatible supported versions.
In order to process a kit successfully, the kit must be compatible with the level of xCAT code that was used to build the kit. The xCAT kit commands and software kits contain the framework version and compatible supported versions.
To view the framework version, use the ``-v | --version`` option on :doc:`addkit </guides/admin-guides/references/man1/addkit.1>` ::
@ -59,7 +59,7 @@ To view the framework version, use the ``-v | --version`` option on :doc:`addkit
compatible_frameworks = 0,1,2
If the commands in the xCAT installation is not compatible with the Software Kit obtained, update xCAT to a more recent release.
If the commands in the xCAT installation is not compatible with the Software Kit obtained, update xCAT to a more recent release.
.. [#] PCM is IBM Platform Cluster Manager
.. [#] PCM is IBM Platform Cluster Manager

View File

@ -24,6 +24,6 @@ updating diskful nodes
For existing active nodes, use the updatenode command to update the OS on those nodes. The updatenode command will use the osimage assigned to the node to determine the software to be updated. Once the osimage has been updated, make sure the correct image is assigned to the node and then run updatenode: ::
chdef <nodelist> provmethod=<osimage>
chdef <nodelist> provmethod=<osimage>
updatenode <nodelist>

View File

@ -1,9 +1,9 @@
Quick Start Guide
=================
This quick start is provided to guide users through the steps required to install the IBM High Performance Computing (HPC) software stack on a cluster managed by xCAT. (*NOTE:* xCAT provides XLC and XLF partial kits, but all other HPC kits are provided by the HPC products teams, xCAT may not have any knowledges of their dependencies and requirements)
This quick start is provided to guide users through the steps required to install the IBM High Performance Computing (HPC) software stack on a cluster managed by xCAT. (*NOTE:* xCAT provides XLC and XLF partial kits, but all other HPC kits are provided by the HPC products teams, xCAT may not have any knowledges of their dependencies and requirements)
The following software kits will be used to install the IBM HPC software stack on to a RedHat Enterprise Linux 7.2 operating system running on ppc64le architecture.
The following software kits will be used to install the IBM HPC software stack on to a RedHat Enterprise Linux 7.2 operating system running on ppc64le architecture.
* ``xlc-13.1.3-0-ppc64le.tar.bz2`` [1]_
* ``xlf-15.1.3-0-ppc64le.tar.bz2`` [1]_
@ -16,7 +16,7 @@ The following software kits will be used to install the IBM HPC software stack o
.. [1] This guide assumes that the **complete** software kit is available for all the products listed below. For the IBM XL compilers, follow the :doc:`IBM XL Compiler </advanced/kit/hpc/software/compilers>` documentation to obtain the software and create the **complete** kit before proceeding.
1. Using the ``addkit`` command, add each software kit package into xCAT: ::
addkit xlc-13.1.3-0-ppc64le.tar.bz2,xlf-15.1.3-0-ppc64le.tar.bz2
addkit pperte-2.3.0.0-1547a-ppc64le.tar.bz2,pperte-2.3.0.2-s002a-ppc64le.tar.bz2
addkit pessl-5.2.0-0-ppc64le.tar.bz2,essl-5.4.0-0-ppc64le.tar.bz2
@ -25,15 +25,15 @@ The following software kits will be used to install the IBM HPC software stack o
The ``lskit`` command can be used to view the kits after adding to xCAT.
2. Using the ``addkitcomp`` command, add the kitcomponent to the target osimage.
2. Using the ``addkitcomp`` command, add the kitcomponent to the target osimage.
The order that the kit components are added to the osimage is important due to dependencies that kits may have with one another, a feature to help catch potential issues ahead of time. There are a few different types of dependencies:
The order that the kit components are added to the osimage is important due to dependencies that kits may have with one another, a feature to help catch potential issues ahead of time. There are a few different types of dependencies:
* **internal kit dependencies** - kit components within the software kit have dependencies. For example, the software has a dependency on it's license component. The ``-a`` option will automatically resolve internal kit dependencies.
* **external kit dependencies** - a software kit depends on another software provided in a separate kit. The dependency kit must be added first. ``addkitcomp`` will complain if it cannot resolve the dependency.
* **runtime dependencies** - the software provided in the kit has rpm requirements for external 3rd party RPMs not shipped with the kit. The administrator needs to configure these before deploying the osimage and ``addkitcomp`` cannot detect this dependencies.
* **external kit dependencies** - a software kit depends on another software provided in a separate kit. The dependency kit must be added first. ``addkitcomp`` will complain if it cannot resolve the dependency.
* **runtime dependencies** - the software provided in the kit has rpm requirements for external 3rd party RPMs not shipped with the kit. The administrator needs to configure these before deploying the osimage and ``addkitcomp`` cannot detect this dependencies.
In the following examples, the ``rhels7.2-ppc64le-install-compute`` osimage is used and the ``-a`` option is specified to resolve internal dependencies.
In the following examples, the ``rhels7.2-ppc64le-install-compute`` osimage is used and the ``-a`` option is specified to resolve internal dependencies.
#. Add the **XLC** kitcomponents to the osimage: ::
@ -42,7 +42,7 @@ The following software kits will be used to install the IBM HPC software stack o
#. Add the **XLF** kitcomponents to the osimage: ::
addkitcomp -a -i rhels7.2-ppc64le-install-compute \
xlf.compiler-compute-15.1.3-0-rhels-7.2-ppc64le
@ -59,11 +59,11 @@ The following software kits will be used to install the IBM HPC software stack o
min-pperte-compute-2.3.0.0-1547a-rhels-7.2-ppc64le
#. Add the PE RTE PTF2, **pperte-s002a**, kitcomponents to the osimage.
#. Add the PE RTE PTF2, **pperte-s002a**, kitcomponents to the osimage.
The PTF2 update requires the ``pperte-license`` component, which is provided by the GA software kit. The ``addkitcomp -n`` option allows for multiple versions of the same kit component to be installed into the osimage. If only the PTF2 version is intended to be installed, you can skip the previous step for adding the GA ppetre kit component, but the GA software kit must have been added to xCAT with the ``addkit`` command in order to resolve the license dependency. ::
addkitcomp -a -n -i rhels7.2-ppc64le-install-compute \
addkitcomp -a -n -i rhels7.2-ppc64le-install-compute \
pperte-login-2.3.0.2-s002a-rhels-7.2-ppc64le
addkitcomp -a -n -i rhels7.2-ppc64le-install-compute \
@ -73,7 +73,7 @@ The following software kits will be used to install the IBM HPC software stack o
min-pperte-compute-2.3.0.2-s002a-rhels-7.2-ppc64le
#. Add the **ESSL** kitcomponents to the osimage.
#. Add the **ESSL** kitcomponents to the osimage.
The ESSL software kit has an *external dependency* to the ``libxlf`` which is provided in the XLF software kit. Since it's already added in the above step, there is no action needed here.
@ -101,9 +101,9 @@ The following software kits will be used to install the IBM HPC software stack o
addkitcomp -a -i rhels7.2-ppc64le-install-compute \
essl-computenode-3264rtecuda-5.4.0-0-rhels-7.2-ppc64le
If the system doesn't have GPU and the CUDA toolkit is not needed, the administrator should not add the following kit components that requires the CUDA packages: ``essl-loginnode-5.4.0-0-rhels-7.2-ppc64le``, ``essl-computenode-3264rte-5.4.0-0-rhels-7.2-ppc64le`` and ``essl-computenode-3264rtecuda-5.4.0-0-rhels-7.2-ppc64le``. Check the ESSL installation guide: http://www.ibm.com/support/knowledgecenter/SSFHY8_5.4.0/com.ibm.cluster.essl.v5r4.essl300.doc/am5il_xcatinstall.htm
If the system doesn't have GPU and the CUDA toolkit is not needed, the administrator should not add the following kit components that requires the CUDA packages: ``essl-loginnode-5.4.0-0-rhels-7.2-ppc64le``, ``essl-computenode-3264rte-5.4.0-0-rhels-7.2-ppc64le`` and ``essl-computenode-3264rtecuda-5.4.0-0-rhels-7.2-ppc64le``. Check the ESSL installation guide: http://www.ibm.com/support/knowledgecenter/SSFHY8_5.4.0/com.ibm.cluster.essl.v5r4.essl300.doc/am5il_xcatinstall.htm
#. Add the **Parallel ESSL** kitcomponents to osimage.
#. Add the **Parallel ESSL** kitcomponents to osimage.
**Note:** ESSL kitcomponents are required for the PESSL. ::
@ -115,7 +115,7 @@ The following software kits will be used to install the IBM HPC software stack o
addkitcomp -a -i rhels7.2-ppc64le-install-compute \
pessl-computenode-3264rtempich-5.2.0-0-rhels-7.2-ppc64le
#. Add the **PE DE** kitcomponents to osimage: ::
@ -124,11 +124,11 @@ The following software kits will be used to install the IBM HPC software stack o
addkitcomp -a -i rhels7.2-ppc64le-install-compute \
ppedev.compute-2.2.0-0-rhels-7.2-ppc64le
3. The updated osimage now contains the configuration to install using xCAT software kits: ::
lsdef -t osimage rhels7.2-ppc64le-install-compute
lsdef -t osimage rhels7.2-ppc64le-install-compute
Object name: rhels7.2-ppc64le-install-compute
exlist=/install/osimages/rhels7.2-ppc64le-install-compute-kits/kits/KIT_COMPONENTS.exlist
imagetype=linux
@ -146,4 +146,4 @@ The following software kits will be used to install the IBM HPC software stack o
provmethod=install
template=/opt/xcat/share/xcat/install/rh/compute.rhels7.tmpl
4. The osimage is now ready to deploy to the compute nodes.
4. The osimage is now ready to deploy to the compute nodes.

View File

@ -1,13 +1,13 @@
IBM XL Compilers
================
IBM provides XL compilers with advanced optimizing on IBM Power Systems running Linux.
IBM provides XL compilers with advanced optimizing on IBM Power Systems running Linux.
For more information, http://www-03.ibm.com/software/products/en/xlcpp-linux
Partial Kits
------------
The IBM XL compilers are dependencies for some of the HPC software products and is **not** available in xCAT Software Kit format.
The IBM XL compilers are dependencies for some of the HPC software products and is **not** available in xCAT Software Kit format.
To assist customers in creating a software kit for the IBM XL compilers, xCAT provides partial kits at: https://xcat.org/files/kits/hpckits/
@ -46,7 +46,7 @@ To use software kits that require compiler kit components, a compiler software k
xlsmp.lib-3.1.0.8-151013.ppc64.rpm
xlsmp.msg.rte-3.1.0.8-151013.ppc64.rpm
xlsmp.rte-3.1.0.8-151013.ppc64.rpm
#. Obtain the corresponding compiler partial kit from https://xcat.org/files/kits/hpckits/. [#]_
**xlc-12.1.0.8-151013-ppc64.NEED_PRODUCT_PKGS.tar.bz2** is downloaded to ``/tmp/kits``: ::
@ -56,11 +56,11 @@ To use software kits that require compiler kit components, a compiler software k
#. Complete the partial kit by running the ``buildkit addpkgs`` command: ::
buildkit addpkgs xlc-12.1.0.8-151013-ppc64.NEED_PRODUCT_PKGS.tar.bz2 \
buildkit addpkgs xlc-12.1.0.8-151013-ppc64.NEED_PRODUCT_PKGS.tar.bz2 \
--pkgdir /tmp/kits/xlc-12.1.0.8
Sample output: ::
Extracting tar file /tmp/kits/xlc-12.1.0.8-151013-ppc64.NEED_PRODUCT_PKGS.tar.bz2. Please wait.
Spawning worker 0 with 5 pkgs
Spawning worker 1 with 5 pkgs
@ -73,7 +73,7 @@ To use software kits that require compiler kit components, a compiler software k
Generating sqlite DBs
Sqlite DBs complete
Creating tar file /tmp/kits/xlc-12.1.0.8-151013-ppc64.tar.bz2.
Kit tar file /tmp/kits/xlc-12.1.0.8-151013-ppc64.tar.bz2 successfully built.
Kit tar file /tmp/kits/xlc-12.1.0.8-151013-ppc64.tar.bz2 successfully built.

View File

@ -10,4 +10,4 @@ Refer to the following pages for product specific details and known issues.
pe_rte.rst
pe_de.rst
essl.rst
pessl.rst
pessl.rst

View File

@ -3,7 +3,7 @@ Parallel Environment Developer Edition (PE DE)
xCAT software kits for PE DE for Linux is available on: [#]_
* PE DE 1.2.0.1 and newer (SystemX)
* PE DE 1.2.0.1 and newer (SystemX)
* PE DE 1.2.0.3 and newer (SystemP)

View File

@ -7,7 +7,7 @@ xCAT software kits for PE RTE for Linux is available on: [#]_
PE RTE and ``mlnxofed_ib_install`` Conflict
PE RTE and ``mlnxofed_ib_install`` Conflict
-------------------------------------------
PPE requires the 32-bit version of ``libibverbs``. The default behavior of the ``mlnxofed_ib_install`` postscript used to install the Mellanox OFED Infiniband (IB) driver is to remove any of the old IB related packages when installing. To bypass this behavior, set the variable ``mlnxofed_options=--force`` when running the ``mlnxofed_ib_install`` script.
@ -18,8 +18,8 @@ Install Multiple Versions
Beginning with **PE RTE 1.2.0.10**, the packages are designed to allow for multiple versions of PE RTE to coexist on the same machine.
The default behavior of xCAT software kits is to only allow one version of a ``kitcomponent`` to be associated with an xCAT osimage.
When using ``addkitcomp`` to add a newer version of a kit component, xCAT will first remove the old version of the kit component before adding the new one.
The default behavior of xCAT software kits is to only allow one version of a ``kitcomponent`` to be associated with an xCAT osimage.
When using ``addkitcomp`` to add a newer version of a kit component, xCAT will first remove the old version of the kit component before adding the new one.
To add multiple versions of PE RTE kit components to the same osimage, use the ``-n | --noupgrade`` option. For example, to add PE RTE 1.3.0.1 and PE RTE 1.3.0.2 to the ``compute`` osimage: ::
@ -36,11 +36,11 @@ When running parallel jobs, POE requires the user pass it a host list file. xCA
Known Issues
------------
* **[PE RTE 1.3.0.7]** - For developers creating the complete software kit. The src rpm is no longer required. It is recommended to create the new software kit for PE RTE 1.3.0.7 from scratch and not to use the older kits as a starting point.
* **[PE RTE 1.3.0.7]** - For developers creating the complete software kit. The src rpm is no longer required. It is recommended to create the new software kit for PE RTE 1.3.0.7 from scratch and not to use the older kits as a starting point.
* **[PE RTE 1.3.0.7]** - When upgrading ``ppe_rte_man`` in a diskless image, there may be errors reported during the genimage process. The new packages are actually upgraded, so the errors can be ignored with low risk.
* **[PE RTE 1.3.0.7]** - When upgrading ``ppe_rte_man`` in a diskless image, there may be errors reported during the genimage process. The new packages are actually upgraded, so the errors can be ignored with low risk.
* **[PE RTE 1.3.0.1 to 1.3.0.6]** - When uninstalling or upgrading ppe_rte_man in an diskless image, ``genimage <osimage>`` may fail and stop an an error. To workaround, simply rerun ``genimage <osimage>`` to finish the creation of the diskless image
* **[PE RTE 1.3.0.1 to 1.3.0.6]** - When uninstalling or upgrading ppe_rte_man in an diskless image, ``genimage <osimage>`` may fail and stop an an error. To workaround, simply rerun ``genimage <osimage>`` to finish the creation of the diskless image

View File

@ -1,9 +1,9 @@
Software Kits
=============
xCAT supports a unique software bundling concept called **software kits**. Software kit combines all of the required product components (packages, license, configuration, scripts, etc) to assist the administrator in the installation of software onto machines managed by xCAT. Software kits are made up of a collection of "kit components", each of which is tailored to one specific environment for that particular version of the software product.
xCAT supports a unique software bundling concept called **software kits**. Software kit combines all of the required product components (packages, license, configuration, scripts, etc) to assist the administrator in the installation of software onto machines managed by xCAT. Software kits are made up of a collection of "kit components", each of which is tailored to one specific environment for that particular version of the software product.
Prebuilt software kits are available as a tar file which can be downloaded and then added to the xCAT installation. After the kits are added to xCAT, kit components are then added to specific xCAT osimages to automatically install the software bundled with the kit during OS deployment. In some instances, software kits may be provided as partial kits. Partial kits need additional effort to complete the kit before it can be used by xCAT.
Prebuilt software kits are available as a tar file which can be downloaded and then added to the xCAT installation. After the kits are added to xCAT, kit components are then added to specific xCAT osimages to automatically install the software bundled with the kit during OS deployment. In some instances, software kits may be provided as partial kits. Partial kits need additional effort to complete the kit before it can be used by xCAT.
Software kits are supported for both diskful and diskless image provisioning.

View File

@ -12,24 +12,24 @@ Backup Old xCAT Management Node
Backup xCAT management node data to backup server:
1.1 Backup xCAT important files and directories:
1.1 Backup xCAT important files and directories:
#. Get ``installdir`` from ``site`` table, backup ``installdir`` directory,
#. Get ``installdir`` from ``site`` table, backup ``installdir`` directory,
in this case, back up ``install`` directory: ::
lsdef -t site clustersite -i installdir
Object name: clustersite
installdir=/install
#. Backup these two xCAT directories: ::
#. Backup these two xCAT directories: ::
~/.xcat
/etc/xcat
.. note:: Backing up ``~/.xcat`` is for all users who have xCAT client certs.
.. note:: Backing up ``~/.xcat`` is for all users who have xCAT client certs.
#. If there are customized files and directories for ``otherpkgdir``, ``pkgdir``, ``pkglist`` or ``template`` in some `osimage` definitions, backup these files and directories. for example: ::
lsdef -t osimage customized_rhels7.4-x86_64-install-compute -i otherpkgdir,pkgdir,pkglist,template
Object name: customized_rhels7.4-x86_64-install-compute
otherpkgdir=/<customized_dir>/post/otherpkgs/rhels7.4/x86_64
@ -87,10 +87,10 @@ Backup xCAT management node data to backup server:
/etc/ntp.conf
1.11 Backup database configure files (optional):
1.11 Backup database configure files (optional):
* **[PostgreSQL]** ::
/var/lib/pgsql/data/pg_hba.conf
/var/lib/pgsql/data/postgresql.conf
@ -140,17 +140,17 @@ Restore xCAT management node
2.1 Power off old xCAT management server before configuring new xCAT management server
2.2 Configure new xCAT management server using the same ip and hostname as old xCAT management server. Configure the same additional network for hardware management network if needed, for example, bmc network or hmc network. xCAT management server setup refer to :doc:`Prepare the Management Node <../../guides/install-guides/yum/prepare_mgmt_node>`
2.3 Overwrite files/directories methioned in above 1.2, 1.3, 1.4 from backup server to new xCAT management server
2.4 Download xcat-core and xcat-dep tar ball, then install xCAT in new xCAT management server, refer to :doc:`install xCAT <../../guides/install-guides/yum/install>`
2.5 Use ``rpm -qa|grep -i xCAT`` to list all xCAT RPMs in new xCAT management node, compare these RPMs base name with those in ``xcat_rpm_names`` from above 1.15. If some RPMs are missing, use ``yum install <rpm_package_basename>`` to install missing RPMs.
2.5 Use ``rpm -qa|grep -i xCAT`` to list all xCAT RPMs in new xCAT management node, compare these RPMs base name with those in ``xcat_rpm_names`` from above 1.15. If some RPMs are missing, use ``yum install <rpm_package_basename>`` to install missing RPMs.
2.6 If use ``MySQL``/``MariaDB``/``PostgreSQL``, migrate xCAT to use ``MySQL/MariaDB/PostgreSQL`` refer to :doc:`Configure a Database <../hierarchy/databases/index>`
2.7 To restore the xCAT database
a. Restore xCAT database from the ``/dbbackup/db`` directory without ``auditlog`` and ``eventlog``, enter: ::
restorexCATdb -p /dbbackup/db
@ -160,7 +160,7 @@ Restore xCAT management node
restorexCATdb -a -p /dbbackup/db
c. (optinal) Overwrite files in above 1.11, restart ``PostgreSQL``: ::
service postgresql restart
2.8 Overwrite remaining files/directories methioned in above 1.1, 1.5, 1.6, 1.7, 1.8, 1.9, 1.10, 1.12; If needed, check if files exist based on above 1.13 and 1.16.

View File

@ -3,11 +3,11 @@ Building Stateless/Diskless Images
A **stateless**, or **diskless**, provisioned nodes is one where the operating system image is deployed and loaded into memory. The Operating System (OS) does not store its files directly onto persistent storage (i.e. hard disk drive, shared drive, usb, etc) and so subsequent rebooting of the machine results in loss of any state changes that happened while the machine was running.
To deploy stateless compute nodes, you must first create a stateless image. The "netboot" osimages created from ``copycds`` in the **osimage** table are sample osimage definitions that can be used for deploying stateless nodes.
To deploy stateless compute nodes, you must first create a stateless image. The "netboot" osimages created from ``copycds`` in the **osimage** table are sample osimage definitions that can be used for deploying stateless nodes.
In a homogeneous cluster, the management node is the same hardware architecture and running the same Operating System (OS) as the compute nodes, so ``genimage`` can directly be executed from the management node.
In a homogeneous cluster, the management node is the same hardware architecture and running the same Operating System (OS) as the compute nodes, so ``genimage`` can directly be executed from the management node.
The issues arises in a heterogeneous cluster, where the management node is running a different level operating system *or* hardware architecture as the compute nodes in which to deploy the image. The ``genimage`` command that builds stateless images depends on various utilities provided by the base operating system and needs to be run on a node with the same hardware architecture and *major* Operating System release as the nodes that will be booted from the image.
The issues arises in a heterogeneous cluster, where the management node is running a different level operating system *or* hardware architecture as the compute nodes in which to deploy the image. The ``genimage`` command that builds stateless images depends on various utilities provided by the base operating system and needs to be run on a node with the same hardware architecture and *major* Operating System release as the nodes that will be booted from the image.
Same Operating System, Different Architecture
---------------------------------------------
@ -19,9 +19,9 @@ The following describes creating stateless images of the same Operating System,
#. On xCAT management node, ``xcatmn``, select the osimage you want to create from the list of osimage definitions. To list out the osimage definitions: ::
lsdef -t osimage
lsdef -t osimage
#. **optional:** Create a copy of the osimage definition that you want to modify.
#. **optional:** Create a copy of the osimage definition that you want to modify.
To take the sample ``rhels6.3-x86_64-netboot-compute`` osimage definition and create a copy called ``mycomputeimage``, run the following command: ::
@ -38,15 +38,15 @@ The following describes creating stateless images of the same Operating System,
./genimage -a x86_64 -o rhels6.3 -p compute --permission 755 --srcdir /install/rhels6.3/x86_64 --pkglist \
/opt/xcat/share/xcat/netboot/rh/compute.rhels6.x86_64.pkglist --otherpkgdir /install/post/otherpkgs/rhels6.3/x86_64 --postinstall \
/opt/xcat/share/xcat/netboot/rh/compute.rhels6.x86_64.postinstall --rootimgdir /install/netboot/rhels6.3/x86_64/compute mycomputeimage
#. Go to the target node, ``n01`` and run the following:
#. mount the ``/install`` directory from the xCAT Management Node: ::
mkdir /install
mount -o soft xcatmn:/install /install
#. Copy the executable files from the ``/opt/xcat/share/xcat/netboot`` from the xCAT Management node to the target node: ::
mkdir -p /opt/xcat/share/xcat/
@ -61,7 +61,7 @@ The following describes creating stateless images of the same Operating System,
**If problems creating the stateless image, provide a local directory for --rootimgdir:** ::
mkdir -p /tmp/compute
Rerun ``genimage``, replacing ``--rootimgdir /tmp/compute``: ::
@ -70,8 +70,8 @@ The following describes creating stateless images of the same Operating System,
./genimage -a x86_64 -o rhels6.3 -p compute --permission 755 --srcdir /install/rhels6.3/x86_64 --pkglist \
/opt/xcat/share/xcat/netboot/rh/compute.rhels6.x86_64.pkglist --otherpkgdir /install/post/otherpkgs/rhels6.3/x86_64 --postinstall \
/opt/xcat/share/xcat/netboot/rh/compute.rhels6.x86_64.postinstall --rootimgdir /tmp/compute mycomputeimage
Then copy the contents from ``/tmp/compute`` to ``/install/netboot/rhels6.3/compute``
Then copy the contents from ``/tmp/compute`` to ``/install/netboot/rhels6.3/compute``
#. Now return to the management node and execute ``packimage`` on the osimage and continue provisioning the node ::

View File

@ -8,8 +8,8 @@ Troubleshooting
Error: Unable to find pxelinux.0 at /opt/xcat/share/xcat/netboot/syslinux/pxelinux.0
**Resolution:**
**Resolution:**
The syslinux network booting files are missing.
The syslinux network booting files are missing.
Install the sylinux-xcat package provided in the xcat-deps repository: ``yum -y install syslinux-xcat``

View File

@ -4,4 +4,4 @@ x86 Management Node
.. toctree::
:maxdepth: 2

View File

@ -9,9 +9,9 @@ It is recommended that lldp protocol in the switches is enabled to collect the s
xCAT will use the ethernet switches during node discovery to find out which switch port a particular MAC address is communicating over. This allows xCAT to match a random booting node with the proper node name in the database. To set up a switch, give it an IP address on its management port and enable basic **SNMP** functionality. (Typically, the **SNMP** agent in the switches is disabled by default.) The easiest method is to configure the switches to give the **SNMP** version 1 community string called "public" read access. This will allow xCAT to communicate to the switches without further customization. (xCAT will get the list of switches from the **switch** table.) If you want to use **SNMP** version 3 (e.g. for better security), see the example below. With **SNMP** V3 you also have to set the user/password and AuthProto (default is **md5**) in the switches table.
If for some reason you can't configure **SNMP** on your switches, you can use sequential discovery or the more manual method of entering the nodes' MACs into the database.
If for some reason you can't configure **SNMP** on your switches, you can use sequential discovery or the more manual method of entering the nodes' MACs into the database.
**SNMP** V3 Configuration example:
**SNMP** V3 Configuration example:
xCAT supports many switch types, such as **BNT** and **Cisco**. Here is an example of configuring **SNMP V3** on the **Cisco** switch 3750/3650:
@ -62,14 +62,14 @@ Switch Management
When managing Ethernet switches, the admin often logs into the switches one by one using SSH or Telnet and runs the switch commands. However, it becomes time consuming when there are a lot of switches in a cluster. In a very large cluster, the switches are often identical and the configurations are identical. It helps to configure and monitor them in parallel from a single command.
For managing Mellanox IB switches and Qlogic IB switches, see :doc:`Mellanox IB switches and Qlogic IB switches </advanced/networks/infiniband/index>`
For managing Mellanox IB switches and Qlogic IB switches, see :doc:`Mellanox IB switches and Qlogic IB switches </advanced/networks/infiniband/index>`
xCAT will not do a lot of switch management functions. Instead, it will configure the switch so that the admin can run remote command such as ``xdsh`` for it. Thus, the admin can use the ``xdsh`` to run proprietary switch commands remotely from the xCAT mn to enable **VLAN**, **bonding**, **SNMP** and others.
Running Remote Commands in Parallel
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
You can use xdsh to run parallel commands on Ethernet switches. The following shows how to configure xCAT to run xdsh on the switches:
You can use xdsh to run parallel commands on Ethernet switches. The following shows how to configure xCAT to run xdsh on the switches:
.. note:: For this to work, configure the switch to allow **ssh** or **telnet**. The procedure varies from switch to switch, consult the reference guides for your switch to find out how to do this.
@ -82,7 +82,7 @@ Set the ssh or telnet username an d password. ::
chdef bntc125 username=admin \
password=password \
protocol=ssh
or
or
chdef bntc125 username=admin \
password=password \
protocol=telnet
@ -101,8 +101,8 @@ Set the ssh or telnet username an d password. ::
Also note that --devicetype is used here. xCAT supports the following switch types out of the box: ::
* BNT
* Cisco
* BNT
* Cisco
* Juniper
* Mellanox (for IB and Ethernet switches)
@ -121,7 +121,7 @@ Here is what result will look like: ::
bntc125: end
bntc125: show vlan
bntc125: VLAN Name Status Ports
bntc125: ---- -------------------------------- ------ ------------------------
bntc125: ---- -------------------------------- ------ ------------------------
bntc125: 1 Default VLAN ena 45-XGE4
bntc125: 3 VLAN 3 dis empty
bntc125: 101 xcatpriv101 ena 24-44

View File

@ -39,4 +39,4 @@ Below are the information ``getadapter`` trying to inspect:
* **candidatename**: All the names which satisfy predictable network device naming scheme, if customer needs to customize their network adapter name, they can choose one of them. (``confignetwork`` needs to do more work to support this. if customer want to use their own name, xcat should offer a interface to get customers input and change this column)
* **linkstate**: The link state of network device
* **linkstate**: The link state of network device

View File

@ -21,9 +21,9 @@ Burn new firmware on each ibaX: ::
mstflint -d 0002:01:00.0 -i <image location> b
.. note:: If this is a PureFlex MezzanineP adapter, you must select the correct image for each ibaX device.
.. note:: If this is a PureFlex MezzanineP adapter, you must select the correct image for each ibaX device.
The difference in the firmware image at the end of the file name:
The difference in the firmware image at the end of the file name:
* _0.bin (iba0/iba2)
* _1.bin (iba1/iba3)
@ -96,7 +96,7 @@ Save the changes made for new IB image: ::
configuration write
Activate the new IB image (reboot switch): ::
reload

View File

@ -1,11 +1,11 @@
Configuration
=============
The process to configure the osimage to install the Mellanox OFED Drivers for Diskful and Diskless scenarios are outlined below.
The process to configure the osimage to install the Mellanox OFED Drivers for Diskful and Diskless scenarios are outlined below.
.. toctree::
:maxdepth: 2
mlnxofed_ib_install_v2_diskful.rst
mlnxofed_ib_install_v2_diskless.rst

View File

@ -1,7 +1,7 @@
Mellanox OFED Installation Script
=================================
Mellanox provides a tested and packaged version of the OpenFabrics Enterprise Distribution (OFED) driver, named Mellanox OFED (MLNX_OFED). To assist with the installation of the MLNX_OFED driver, xCAT provides a sample postscript: ``mlnxofed_ib_install.v2``.
Mellanox provides a tested and packaged version of the OpenFabrics Enterprise Distribution (OFED) driver, named Mellanox OFED (MLNX_OFED). To assist with the installation of the MLNX_OFED driver, xCAT provides a sample postscript: ``mlnxofed_ib_install.v2``.
.. toctree::
:maxdepth: 2
@ -10,4 +10,4 @@ Mellanox provides a tested and packaged version of the OpenFabrics Enterprise Di
mlnxofed_configuration.rst
mlnxofed_ib_verified_scenario_matrix.rst
mlnxofed_ib_known_issue.rst

View File

@ -11,16 +11,16 @@ Diskful Installation
#. Configure the ``mlnxofed_ib_install`` script to install the MNLX_OFED drivers
xCAT has a concept of postscripts that can be used to customize the node after the operating system is installed.
xCAT has a concept of postscripts that can be used to customize the node after the operating system is installed.
Mellanox recommends that the operating system is rebooted after the drivers are installed, so xCAT recommends using the ``postscripts`` attribute to avoid the need for a second reboot. To invoke the ``mlnxofed_ib_install`` as a postscript ::
chdef -t node -o <node_name> \
chdef -t node -o <node_name> \
-p postscripts="mlnxofed_ib_install -p /install/<path-to>/<MLNX_OFED_LINUX.iso>"
**[kernel mismatch issue]** The Mellanox OFED ISO is built against a series of specific kernel version. If the version of the linux kernel does not match any of the Mellanox offered pre-built kernel modules, you can pass the ``--add-kernel-support --force`` argument to the Mellanox installation script to build the kernel modules based on the version you are using. ::
chdef -t node -o <node_name> \
chdef -t node -o <node_name> \
-p postscripts="mlnxofed_ib_install -p /install/<path-to>/<MLNX_OFED_LINUX.iso> \
-m --add-kernel-support --force -end-"
@ -37,8 +37,8 @@ Diskful Installation
service openibd status
systemd: ::
systemctl status openibd.service
systemctl status openibd.service
* Verify that the Mellanox IB drivers are located at: ``/lib/modules/<kernel_version>/extra/``

View File

@ -1,7 +1,7 @@
Diskless Installation
=====================
#. Prepare dependency packages in the pkglist
#. Prepare dependency packages in the pkglist
In order for the Mellanox installation script to execute successfully, certain dependency packages are required to be installed on the compute node. xCAT provides sample package list files to help resolve these dependencies. The samples are located at ``/opt/xcat/share/xcat/ib/netboot/<os>/``.
@ -11,36 +11,36 @@ Diskless Installation
#. Configure the ``mlnxofed_ib_install`` script to install the MNLX_OFED drivers
Edit the ``postinstall`` script on the osimage to invoke the ``mlnxofed_ib_install`` install script.
Edit the ``postinstall`` script on the osimage to invoke the ``mlnxofed_ib_install`` install script.
For example, take ``rhels7.2-ppc64le-netboot-compute``:
For example, take ``rhels7.2-ppc64le-netboot-compute``:
#. Find the path to the ``postinstall`` script: ::
#. Find the path to the ``postinstall`` script: ::
# lsdef -t osimage -o rhels7.2-ppc64le-netboot-compute -i postinstall
Object name: rhels7.2-ppc64le-netboot-compute
postinstall=/opt/xcat/share/xcat/netboot/rh/compute.rhels7.ppc64le.postinstall
#. Edit the ``/opt/xcat/share/xcat/netboot/rh/compute.rhels7.ppc64le.postinstall`` and add the following: ::
/install/postscripts/mlnxofed_ib_install \
-p /install/<path-to>/<MLNX_OFED_LINUX.iso> -i $1 -n genimage
.. note:: The ``$1`` is a argument that is passed to the the postinstall script at runtime.
.. tip:: **Kernel Mismatch**
The Mellanox OFED ISO is built against a series of specific kernel version. If the version of the linux kernel being used does not match any of the pre-built kernels, pass ``--add-kernel-support --without-32bit --without-fw-update --force`` to the Mellanox installation script to build the kernel modules based on the kernel you are using. Example: ::
.. tip:: **Kernel Mismatch**
The Mellanox OFED ISO is built against a series of specific kernel version. If the version of the linux kernel being used does not match any of the pre-built kernels, pass ``--add-kernel-support --without-32bit --without-fw-update --force`` to the Mellanox installation script to build the kernel modules based on the kernel you are using. Example: ::
/install/postscripts/mlnxofed_ib_install \
-p /install/<path-to>/<MLNX_OFED_LINUX.iso> -m --add-kernel-support --without-32bit --without-fw-update --force -end- \
-i $1 -n genimage
#. Generate the diskless image
#. Generate the diskless image
Use the ``genimage`` command to generate the diskless image from the osimage definition ::
genimage <osimage>
Use the ``packimage`` command to pack the diskless image for deployment ::
@ -50,7 +50,7 @@ Diskless Installation
#. Provision the node ::
rinstall <node> osimage=rhels7.2-ppc64le-netboot-compute
#. Verification
* Check the status of ``openibd`` service
@ -60,8 +60,8 @@ Diskless Installation
service openibd status
systemd: ::
systemctl status openibd.service
systemctl status openibd.service
* Verify that the Mellanox IB drivers are located at: ``/lib/modules/<kernel_version>/extra/``

View File

@ -4,7 +4,7 @@ Preparation
Download MLNX_OFED ISO
----------------------
**xCAT only supports installation using the ISO format.**
**xCAT only supports installation using the ISO format.**
Download the Mellanox OFED ISO file `here (MLNX_OFED) <http://www.mellanox.com/page/products_dyn?product_family=26&mtag=linux_sw_drivers>`_.
@ -22,7 +22,7 @@ The ``mlnxofed_ib_install.v2`` is a sample script intended to assist with the in
# ensure the script has execute permission
chmod +x /install/postscripts/mlnxofed_ib_install
#. Familiarize the options available for the xCAT ``mlnxofed_ib_install`` script.
#. Familiarize the options available for the xCAT ``mlnxofed_ib_install`` script.
+---------+------------------+----------------------------------------------------------+
| Option | Required | Description |
@ -55,5 +55,5 @@ The ``mlnxofed_ib_install.v2`` is a sample script intended to assist with the in
To pass the ``--add-kernel-support`` option to ``mlnxofedinstall``, use the following command: ::
/install/postscripts/mlnxofed_ib_install -p /install/<path-to>/<MLNX_OFED_LINUX.iso> \
-m --without-32bit --without-fw-update --add-kernel-support --force -end-
-m --without-32bit --without-fw-update --add-kernel-support --force -end-

View File

@ -4,16 +4,16 @@ Known Issues
Preventing upgrade of the Mellanox Drivers
------------------------------------------
On RedHat operating systems, after the Mellanox drivers are installed, you may have a requirement to update your operating system to a later version.
Some operating systems may ship InfiniBand drivers that are higher version than the Mellanox drivers you have installed and therefor may update the existing drivers.
On RedHat operating systems, after the Mellanox drivers are installed, you may have a requirement to update your operating system to a later version.
Some operating systems may ship InfiniBand drivers that are higher version than the Mellanox drivers you have installed and therefor may update the existing drivers.
To prevent this from happening, add the following in the ``/etc/yum.conf`` ::
exclude=dapl* libib* ibacm infiniband* libmlx* librdma* opensm* ibutils*
Development packages in SLES
Development packages in SLES
----------------------------
If using the ``--add-kernel-support`` attribute on SLES operating systems, you may find problems with installing some dependency packages which are not shipped by the SLES server DVDs. The development rpms are provided by the SDK DVDs. Refer to :doc:`Add Additional Software Packages </guides/admin-guides/manage_clusters/ppc64le/diskful/customize_image/additional_pkg>` to configure the SDK repositories.
If using the ``--add-kernel-support`` attribute on SLES operating systems, you may find problems with installing some dependency packages which are not shipped by the SLES server DVDs. The development rpms are provided by the SDK DVDs. Refer to :doc:`Add Additional Software Packages </guides/admin-guides/manage_clusters/ppc64le/diskful/customize_image/additional_pkg>` to configure the SDK repositories.

View File

@ -1,7 +1,7 @@
MLNX_OFED Support Matrix
========================
The following ISO images and attributes have been verified by the xCAT Team.
The following ISO images and attributes have been verified by the xCAT Team.
**RedHat Enterprise Linux**

View File

@ -11,7 +11,7 @@ If your target Mellanox IB adapter has 2 ports, and you plan to give port ib0 4
1. Define your networks in networks table ::
chdef -t network -o ib0ipv41 net=20.0.0.0 mask=255.255.255.0 mgtifname=ib0
chdef -t network -o ib0ipv41 net=20.0.0.0 mask=255.255.255.0 mgtifname=ib0
chdef -t network -o ib0ipv42 net=30.0.0.0 mask=255.255.255.0 mgtifname=ib0
chdef -t network -o ib0ipv61 net=1:2::/64 mask=/64 mgtifname=ib0 gateway=1:2::2
chdef -t network -o ib0ipv62 net=2:2::/64 mask=/64 mgtifname=ib0 gateway=

View File

@ -18,7 +18,7 @@ Add the login user name and password to the switches table: ::
The switches table will look like this: ::
#switch,...,sshusername,sshpassword,switchtype,....
#switch,...,sshusername,sshpassword,switchtype,....
"mswitch",,,,,,,"admin","admin","MellanoxIB",,
If there is only one admin and one password for all the switches then put the entry in the xCAT passwd table for the admin id and password to use to login. ::

View File

@ -75,4 +75,4 @@ If it is not running, then run the following commands: ::
monadd snmpmon
monstart snmpmon

View File

@ -1,13 +1,13 @@
Open Network Install Environment Switches
Open Network Install Environment Switches
=========================================
The Open Network Install Environment, or "ONIE" [1]_. is an open source project definining an **install environment** for bare metal switches. This environment allows choice for the end users when selecting a network operating system to install onto these bare metal switches.
The Open Network Install Environment, or "ONIE" [1]_. is an open source project definining an **install environment** for bare metal switches. This environment allows choice for the end users when selecting a network operating system to install onto these bare metal switches.
.. toctree::
:maxdepth: 2
os_cumulus/index.rst
os_cumulus/ztp.rst
os_cumulus/index.rst
os_cumulus/ztp.rst
.. [1] Open Network Install Environment: Created by Cumulus Networks, Inc. in 2012, the Open Network Install Environment (ONIE) Project is a small operating system, pre-installed as firmware on bare metal network switches, that provides an environment for automated operating system provisioning.

View File

@ -10,18 +10,18 @@ xCAT provides support for detecting and installing the Cumulus Linux OS into ONI
#. Create a pre-defined switch definition for the ONIE switch using the ``onieswitch`` template.
The mac address of the switch management port is required for xCAT to configure the DHCP information and send over the OS to install on the switch.
The mac address of the switch management port is required for xCAT to configure the DHCP information and send over the OS to install on the switch.
**Small Clusters**
**Small Clusters**
If you know the mac address of the management port on the switch, create the pre-defined switch definition providing the mac address. ::
mkdef frame01sw1 --template onieswitch arch=armv71 \
ip=192.168.1.1 mac="aa:bb:cc:dd:ee:ff"
**Large Clusters**
**Large Clusters**
xCAT's :doc:`switchdiscover </guides/admin-guides/references/man1/switchdiscover.1>` command can be used to discover the mac address and fill in the predefined switch definitions based on the switch/switchport mapping.
xCAT's :doc:`switchdiscover </guides/admin-guides/references/man1/switchdiscover.1>` command can be used to discover the mac address and fill in the predefined switch definitions based on the switch/switchport mapping.
#. Define all the switch objects providing the switch/switchport mapping: ::
@ -34,8 +34,8 @@ xCAT provides support for detecting and installing the Cumulus Linux OS into ONI
ip=192.168.3.1 switch=coresw1 switchport=3
mkdef frame04sw1 --template onieswitch arch=armv71 \
ip=192.168.4.1 switch=coresw1 switchport=4
...
...
#. Leverage ``switchdiscover`` over the DHCP range to automatically detect the MAC address and write them into the predefined switches above. ::
switchdiscover --range <IP range>
@ -43,9 +43,9 @@ xCAT provides support for detecting and installing the Cumulus Linux OS into ONI
#. Run the ``nodeset`` command to set the ``provmethod`` attribute of the target switch(es) to the Cumulus Linux install image and prepare the DHCP/BOOTP lease information for the switch: ::
# nodeset frame01sw1 osimage=cumulus3.5.2-armel
# nodeset frame01sw1 osimage=cumulus3.5.2-armel
# lsdef frame01sw1
Object name: frame01sw1
Object name: frame01sw1
arch=armv7l
groups=switch,edge_switch
ip=172.21.208.03
@ -82,11 +82,11 @@ Configure xCAT Remote Commands
After Cumulus Linux OS is installed, a default user ``cumulus`` will be created with default password: ``CumulusLinux!``.
To ease in the management of the switch, xCAT provides a script to help configure password-less ssh as the ``root`` user. This script sends over the xCAT ssh keys so that the xCAT remote commands (``xdsh``, ``xdcp``, etc) can be run against the ONIE switches.
To ease in the management of the switch, xCAT provides a script to help configure password-less ssh as the ``root`` user. This script sends over the xCAT ssh keys so that the xCAT remote commands (``xdsh``, ``xdcp``, etc) can be run against the ONIE switches.
Execute the following to sync the xCAT keys to the switch: ::
/opt/xcat/share/xcat/scripts/configonie --switches frame01sw1 --ssh
/opt/xcat/share/xcat/scripts/configonie --switches frame01sw1 --ssh
Validate the ssh keys are correctly configured by running a ``xdsh`` command: ::
@ -98,7 +98,7 @@ Validate the ssh keys are correctly configured by running a ``xdsh`` command: ::
Activate the License
--------------------
After Cumulus Linux OS is installed onto the ONIE switch, only the serial port console and the management ethernet port is enabled. To activate the rest of the switch ports, the license file needs to be installed onto the switch.
After Cumulus Linux OS is installed onto the ONIE switch, only the serial port console and the management ethernet port is enabled. To activate the rest of the switch ports, the license file needs to be installed onto the switch.
#. Copy the license file to the switch: ::
@ -159,6 +159,6 @@ To verify the SNMPv3 configuration, run ``xcatprobe switch_macmap`` command, wil
...........................more output.....................

View File

@ -4,9 +4,9 @@ Switch Management
Sync File support
------------------
xCAT supports synchronize of configuration files for cumulus switches.
xCAT supports synchronize of configuration files for cumulus switches.
#. Use instructions in doc: :ref:`The_synclist_file` to set up syncfile.
#. Use instructions in doc: :ref:`The_synclist_file` to set up syncfile.
#. Add syncfile to cumulus osimage. ::
# chdef -t osimage cumulus3.5.2-armel synclists=/tmp/synclists
@ -24,12 +24,12 @@ Switch Port and VLAN Configuration
xCAT places the front-panel port configuration in ``/etc/network/interfaces.d/xCAT.intf``.
The ``configinterface`` postscript can be used to pull switch interface configuration from the xCAT Management Node (MN) to the switch. Place the switch specific confguration files in the following directory on the MN: ``/install/custom/sw_os/cumulus/interface/``.
The ``configinterface`` postscript can be used to pull switch interface configuration from the xCAT Management Node (MN) to the switch. Place the switch specific confguration files in the following directory on the MN: ``/install/custom/sw_os/cumulus/interface/``.
xCAT will look for files in the above directory in the following order:
1. file name that matches the switch hostname
2. file name that matches the switch group name
1. file name that matches the switch hostname
2. file name that matches the switch group name
3. file name that has the word 'default'
.. note:: If the postscript cannot find a configuration file on the MN, it will set all ports on the switch to be part of VLAN 1.
@ -42,7 +42,7 @@ Execute the script using the following command: ::
Re-install OS
-------------
There may be occasions where a re-install of the Cumulus Linux OS is required. The following commands can be used to invoke the install:
There may be occasions where a re-install of the Cumulus Linux OS is required. The following commands can be used to invoke the install:
.. important:: This assumes that the Cumulus Linux files are on the xCAT MN in the correct place.
@ -50,11 +50,11 @@ There may be occasions where a re-install of the Cumulus Linux OS is required.
# to clear out all the previous configuration, use the -k option (optional)
xdsh <switch> "/usr/cumulus/bin/onie-select -k
# to invoke the reinstall of the OS
xdsh <switch> "/usr/cumulus/bin/onie-select -i -f;reboot"
* **Manually**, log into the switch and run the following commands: ::
sudo onie-select -i
sudo reboot
sudo reboot

View File

@ -1,7 +1,7 @@
Preparation
===========
Prepare the Cumulus Linux files on the xCAT Management Node.
Prepare the Cumulus Linux files on the xCAT Management Node.
#. Obtain a valid Cumulus Linux License and download the Cumulus Linux OS installer.
@ -10,18 +10,18 @@ Prepare the Cumulus Linux files on the xCAT Management Node.
# Create a directory to hold the cumulus linux files
mkdir -p /install/custom/sw_os/cumulus/
# copy the license file
# copy the license file
cp licensefile.txt /install/custom/sw_os/cumulus/
# copy the installer
# copy the installer
cp cumulus-linux-3.1.0-bcm-armel.bin /install/custom/sw_os/cumulus/
Cumulus osimage
Cumulus osimage
---------------
xCAT can able to create a cumulus osimage defintion via ``copycds`` command. ``copycds`` will copy cumulus installer to a destination directory, and create several relevant osimage definitions. **cumulus<release>-<arch>** is the default osimage name. ::
#run copycds command
# copycds cumulus-linux-3.5.2-bcm-armel.bin

Some files were not shown because too many files have changed in this diff Show More