diff --git a/.gitignore b/.gitignore
index 893951706..4767b51c3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,6 @@
docs/build
# Ignore the temporary file for creating the Release version
Release.save*
+# ignore the old generated man pages and html pages
+xCAT-client/share/man
+xCAT-client/share/doc
diff --git a/README.rst b/README.rst
index 883a8aeb8..2cb961dfd 100644
--- a/README.rst
+++ b/README.rst
@@ -8,7 +8,7 @@ Documentation
xCAT documentation is available at: http://xcat-docs.readthedocs.io/en/latest/
-|docs_latest| |docs_2131| |docs_2130| |docs_212| |docs_211|
+|docs_latest| |docs_2133| |docs_2132| |docs_2131| |docs_2130| |docs_212| |docs_211|
Open Source License
-------------------
@@ -22,6 +22,16 @@ Developers
Developers and prospective contributors are encouraged to read the `Developers Guide `_
In particular the `GitHub `_ related subsection.
+.. |docs_2133| image:: https://readthedocs.org/projects/xcat-docs/badge/?version=2.13.3
+ :alt: 2.13.3 documentation status
+ :scale: 100%
+ :target: http://xcat-docs.readthedocs.io/en/2.13.3/
+
+.. |docs_2132| image:: https://readthedocs.org/projects/xcat-docs/badge/?version=2.13.2
+ :alt: 2.13.2 documentation status
+ :scale: 100%
+ :target: http://xcat-docs.readthedocs.io/en/2.13.2/
+
.. |docs_2131| image:: https://readthedocs.org/projects/xcat-docs/badge/?version=2.13.1
:alt: 2.13.1 documentation status
:scale: 100%
diff --git a/Version b/Version
index b24afe26d..cb9778b41 100644
--- a/Version
+++ b/Version
@@ -1 +1 @@
-2.13.2.1
+2.13.4
\ No newline at end of file
diff --git a/build-ubunturepo b/build-ubunturepo
index 5c078b0dd..49e46cf63 100755
--- a/build-ubunturepo
+++ b/build-ubunturepo
@@ -6,10 +6,10 @@
#
#
# Getting Started:
-# - Clone the xcat-core git repository int a directory called /src/xcat-core, where
-# is the same name as the release dir it is uploaded to xcat.org (e.g devel, 2.9, 2.10)
+# - Clone the xcat-core git repository under a directory named "xcat-core/src"
# - make sure reprepro is installed on the build machine
-# - Run this script from the local git repository you just created. It will create the other directories that are needed.
+# - Run this script from the local git repository you just created.
+# ./build-ubunturepo -c BUILDALL=1
# Usage: attr=value attr=value ... ./build-ubunturepo { -c | -d }
# PROMOTE=1 - if the attribute "PROMOTE" is specified, means an official dot release. This does not
@@ -22,7 +22,6 @@
# When you are ready to release this build, use PROMOTE=1 without PREGA
# BUILDALL=1 - build all rpms, whether they changed or not. Should be used for snap builds that are in
# prep for a release.
-# UP=0 or UP=1 - override the default upload behavior
# LOG= - provide an LOG file option to redirect some output into log file
#
# For the dependency packages 1. All the xcat dependency deb packages should be uploaded to
@@ -96,13 +95,6 @@ if [ -z "$c_flag" -a -z "$d_flag" ];then
exit 2
fi
-USER="xcat"
-SERVER="xcat.org"
-FILES_PATH="files"
-FRS="/var/www/${SERVER}/${FILES_PATH}"
-APT_DIR="${FRS}/xcat"
-APT_REPO_DIR="${APT_DIR}/repos/apt"
-
if [ "$c_flag" -a "$d_flag" ];then
printusage
exit 2
@@ -118,7 +110,7 @@ curdir=`pwd`
local_core_repo_path="$curdir/../../xcat-core"
local_dep_repo_path="$curdir/../../xcat-dep/xcat-dep"
-#use flock to only one person build at the same time
+# Use flock to only one person build at the same time
# Get a lock, so can not do 2 builds at once
exec 8>/var/lock/xcatbld.lock
if ! flock -n 8; then
@@ -146,51 +138,46 @@ done
if [ "$c_flag" ]
then
- # strip the /src/xcat-core from the end of the dir to get the next dir up and use as the release
+ #
+ # The format of the directory for Ubuntu builds needs to be "xcat-core/src/xcat-core" so
+ # that the output build files are created under "xcat-core".
+ # TODO: This should be fixed in the future....
+ #
if [ -z "$REL" ]; then
t=${curdir%/src/xcat-core}
REL=`basename $t`
fi
+ if [ "$REL" != "xcat-core" ]; then
+ echo "ERROR: REL='$REL'needs to be 'xcat-core'. Ensure the path is 'xcat-core/src/xcat-core'"
+ exit 1
+ fi
- ver=`cat Version`
if [ "$PROMOTE" != 1 ]; then
code_change=0
update_log=''
- #get the version
- if [ "$REL" = "xcat-core" ];then
- git_flag=1
- REL=`git rev-parse --abbrev-ref HEAD`
- if [ "$REL" = "master" ]; then
- REL="devel"
- fi
- if [ -z "$GITUP" ];then
- update_log=../coregitup
- echo "git pull > $update_log"
- git pull > $update_log
- else
- update_log=$GITUP
- fi
-
- if ! grep -q 'Already up-to-date' $update_log; then
- code_change=1
- fi
+ # get the version
+ git_flag=1
+ REL=`git rev-parse --abbrev-ref HEAD`
+ if [ "$REL" = "master" ]; then
+ REL="devel"
+ fi
+ if [ -z "$GITUP" ];then
+ update_log=../coregitup
+ echo "git pull > $update_log"
+ git pull > $update_log
else
- git_flag=0
- if [ -z "$SVNUP" ]; then
- update_log=../coresvnup
- echo "svn up > $update_log"
- svn up > $update_log
- else
- update_log=$SVNUP
- fi
+ update_log=$GITUP
+ fi
- if ! grep -q 'At revision' $update_log;then
- code_change=1
- fi
+ if ! grep -q 'Already up-to-date' $update_log; then
+ code_change=1
fi
ver=`cat Version`
short_ver=`cat Version|cut -d. -f 1,2`
short_short_ver=`cat Version|cut -d. -f 1`
+ build_time=`date`
+ build_machine=`hostname`
+ commit_id=`git rev-parse --short HEAD`
package_dir_name=debs$REL
#TODO: define the core path and tarball name
@@ -206,11 +193,10 @@ then
echo "###############################"
#the package type: local | snap | alpha
- #the build introduce stirng
- pkg_type="snap"
+ #the build introduce string
build_string="Snap_Build"
- cur_date=`date +%Y%m%d%H%M`
- pkg_version="${ver}-${pkg_type}${cur_date}"
+ xcat_release="snap$(date '+%Y%m%d%H%M')"
+ pkg_version="${ver}-${xcat_release}"
if [ ! -d ../../$package_dir_name ];then
mkdir -p "../../$package_dir_name"
@@ -229,8 +215,6 @@ then
do
if grep -q $file $update_log || [ "$BUILDALL" == 1 -o "$file" = "perl-xCAT" ]; then
rm -f ../../$package_dir_name/${file_low}_*.$target_arch.deb
- #genesis scripts package, don't remove genesis amd64 files
- #rm -f ../../$package_dir_name/${file_low}-amd64_*.deb
cd $file
CURDIR=$(pwd)
dch -v $pkg_version -b -c debian/changelog $build_string
@@ -377,6 +361,16 @@ __EOF__
chmod 775 mklocalrepo.sh
+ #
+ # Add a buildinfo file under xcat-core to track information about the build
+ #
+ buildinfo=$local_core_repo_path/buildinfo
+ echo "VERSION=$ver" > $buildinfo
+ echo "RELEASE=$xcat_release" >> $buildinfo
+ echo "BUILD_TIME=$build_time" >> $buildinfo
+ echo "BUILD_MACHINE=$build_machine" >> $buildinfo
+ echo "COMMIT_ID=$commit_id" >> $buildinfo
+
#create the xcat-core.list file
cd ../
@@ -501,7 +495,15 @@ __EOF__
chgrp root $dep_tar_name
chmod g+w $dep_tar_name
- # Decide whether to upload or not (default NOT to upload)
+
+ USER="xcat"
+ SERVER="xcat.org"
+ FILES_PATH="files"
+ FRS="/var/www/${SERVER}/${FILES_PATH}"
+ APT_DIR="${FRS}/xcat"
+ APT_REPO_DIR="${APT_DIR}/repos/apt"
+
+ # Decide whether to upload the xcat-dep package or NOT (default is to NOT upload xcat-dep
if [ "$UP" != "1" ]; then
echo "Upload not specified, Done! (rerun with UP=1, to upload)"
cd $old_pwd
@@ -527,7 +529,7 @@ __EOF__
while [ $((i+=1)) -le 5 ] && ! rsync -v --force README $USER@${SERVER}:${APT_DIR}/xcat-dep/2.x_Ubuntu/
do : ; done
- cd $old_pwd
- exit 0
fi
+
+cd $old_pwd
exit 0
diff --git a/docs/source/advanced/cluster_maintenance/compute_node/index.rst b/docs/source/advanced/cluster_maintenance/compute_node/index.rst
index 6199699e6..87bc1c733 100644
--- a/docs/source/advanced/cluster_maintenance/compute_node/index.rst
+++ b/docs/source/advanced/cluster_maintenance/compute_node/index.rst
@@ -5,3 +5,4 @@ Compute Node
:maxdepth: 2
changing_hostname_ip.rst
+ replace/index.rst
diff --git a/docs/source/advanced/cluster_maintenance/compute_node/replace/index.rst b/docs/source/advanced/cluster_maintenance/compute_node/replace/index.rst
new file mode 100644
index 000000000..7346fb246
--- /dev/null
+++ b/docs/source/advanced/cluster_maintenance/compute_node/replace/index.rst
@@ -0,0 +1,7 @@
+Replacing Nodes
+===============
+
+.. toctree::
+ :maxdepth: 2
+
+ openpower.rst
diff --git a/docs/source/advanced/cluster_maintenance/compute_node/replace/openpower.rst b/docs/source/advanced/cluster_maintenance/compute_node/replace/openpower.rst
new file mode 100644
index 000000000..6e17f9947
--- /dev/null
+++ b/docs/source/advanced/cluster_maintenance/compute_node/replace/openpower.rst
@@ -0,0 +1,38 @@
+OpenPower Nodes
+===============
+
+
+When compute nodes are physically replaced in the frame, leverage xCAT to re-discover the compute nodes. The following guide can be used for:
+
+ * IBM OpenPower S822LC for HPC
+
+
+#. Identify the machine(s) to be replaced: ``frame10cn02``.
+
+#. [**Optional**] It's recommended to set the BMC IP address back to DHCP, if it was set to STATIC. ::
+
+ rspconfig frame10cn02 ip=dhcp
+
+#. Set the outgoing machine to ``offline`` and remove attributes of the machine: ::
+
+ nodeset frame10cn02 offline
+ chdef frame10cn02 mac=""
+
+#. If using **MTMS**-based discovery, fill in the Model-Type and Serial Number for the machine: ::
+
+ chdef frame10cn02 mtm=8335-GTB serial=
+
+#. If using **SWITCH**-based discovery, go on to the next step. The ``switch`` and ``switch-port`` should already be set in the compute node definition.
+
+ Node attributes will be replaced during the discovery process (mtm, serial, mac, etc.)
+
+#. Search for the new BMC in the open range: ::
+
+ bmcdiscover --range -w -z
+
+#. When the BMC is found, start the discovery with the following commands: ::
+
+ rsetboot /node-8335.* net
+ rpower /node-8335.* boot
+
+
diff --git a/docs/source/advanced/cluster_maintenance/mgmt_node/changing_hostname_ip.rst b/docs/source/advanced/cluster_maintenance/mgmt_node/changing_hostname_ip.rst
index fda96e1ad..72282f3fb 100644
--- a/docs/source/advanced/cluster_maintenance/mgmt_node/changing_hostname_ip.rst
+++ b/docs/source/advanced/cluster_maintenance/mgmt_node/changing_hostname_ip.rst
@@ -52,7 +52,7 @@ Change the Management Hostname
hostname
-* Edit hostname configuration file
+* Update the hostname configuration files:
| Add hostname in ``/etc/hostname``
| Add HOSTNAME attribute in ``/etc/sysconfig/network`` (only for [RHEL])
@@ -60,29 +60,29 @@ Change the Management Hostname
Update Database Files
---------------------
-You need to update the new MN hostname or IP address in several database
-configuration files.
+You need to update the new MN hostname or IP address in several database configuration files.
SQLite
^^^^^^
Nothing to do.
-Postgresql
+PostgreSQL
^^^^^^^^^^
-- Edit ``/etc/xcat/cfgloc`` file, replace ``Pg:dbname=xcatdb;host=|xcatadm|xcat20``
- with ``Pg:dbname=xcatdb;host=|xcatadm|xcat20``.
+- Edit ``/etc/xcat/cfgloc`` file...
-- Edit config database config file ``/var/lib/pgsql/data/pg_hba.conf``,
- replace ``host all all /32 md5``
- with ``host all all /32 md5``.
+ Replace ``Pg:dbname=xcatdb;host=|xcatadm|xcat20`` with ``Pg:dbname=xcatdb;host=|xcatadm|xcat20``.
-Mysql
+- Edit config database config file ``/var/lib/pgsql/data/pg_hba.conf``...
+
+ Replace ``host all all /32 md5`` with ``host all all /32 md5``
+
+MySQL
^^^^^
-Edit ``/etc/xcat/cfglooc``, replace ``mysql:dbname=xcatdb;host=|xcatadmin|xcat20``
-with ``mysql:dbname=xcatdb;host=|xcatadmin|xcat20``.
+- Edit ``/etc/xcat/cfglooc``...
+ Replace ``mysql:dbname=xcatdb;host=|xcatadmin|xcat20`` with ``mysql:dbname=xcatdb;host=|xcatadmin|xcat20``
Start the database
------------------
@@ -116,12 +116,12 @@ Change the site table master attribute
Change all IP address attribute relevant to the MN IP address
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-For example, old address was "10.6.0.1"
+For example, the old IP address was "10.6.0.1"
-* Query the attributes with old address ::
+* Query all the attributes with old address ::
lsdef -t node -l | grep "10.6.0.1"
- # the output may looks like
+ ...
conserver=10.6.0.1
conserver=10.6.0.1
conserver=10.6.0.1
@@ -138,26 +138,23 @@ For example, old address was "10.6.0.1"
servicenode=10.6.0.1
xcatmaster=10.6.0.1
-* As the attribute with the old IP address is list above, take conserver as
- a example, query the nodes with ``conserver=10.6.0.1``.
-
- ::
+* Looking at the list above, taking ``conserver`` as an example, query the nodes with ``conserver=10.6.0.1``: ::
lsdef -t node -w conserver="10.6.0.1"
- # the output looks like
+ ...
cn1 (node)
cn2 (node)
cn3 (node)
cn4 (node)
-* Change the conserver address for cn1,cn2,cn3,cn4 ::
+* Change the conserver address for nodes ``cn1,cn2,cn3,cn4`` ::
chdef -t node cn1-cn4 conserver=
-Repeat the same process for the other attributes.
+* Repeat the same process for the other attributes containing the old IP address.
Change networks table
-^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^
Check your networks table to see if the network definitions are still correct,
if not edit accordingly ::
@@ -166,7 +163,7 @@ if not edit accordingly ::
chdef -t network
Check Result
-^^^^^^^^^^^^^
+^^^^^^^^^^^^
You can check whether all the old address has been changed using ::
@@ -174,25 +171,24 @@ You can check whether all the old address has been changed using ::
cd
fgrep "10.6.0.1" *.csv
-If the old address still exists in the ``*.csv` file, you can edit this file,
-then use the following command to restore the records ::
+If the old address still exists in the ``*.csv`` file, you can edit this file, then use the following command to restore the records ::
tabrestore
Generate SSL credentials(optional)
----------------------------------
-If you do not generate new credentials, skip this section.
-If you decide generate new credentials, then you will use the following
-command.
+Use the following command to generate new SSL credentials: ``xcatconfig -c``.
-* Generate new credentials ::
+Then update the following in xCAT:
- xcatconfig -c
+* Update the policy table with new management node name and replace: ::
-* Update the policy table with new MN name,
- replace ``"1.4","old_MN_name",,,,,,"trusted",,`` with
- ``"1.4","new_MN_name",,,,,,"trusted",,``
+ "1.4","old_MN_name",,,,,,"trusted",,
+
+ with: ::
+
+ "1.4","new_MN_name",,,,,,"trusted",,``
* Setup up conserver with new credentials ::
diff --git a/docs/source/advanced/cluster_maintenance/service_node/changing_hostname_ip.rst b/docs/source/advanced/cluster_maintenance/service_node/changing_hostname_ip.rst
index e99134962..f971eb7ba 100644
--- a/docs/source/advanced/cluster_maintenance/service_node/changing_hostname_ip.rst
+++ b/docs/source/advanced/cluster_maintenance/service_node/changing_hostname_ip.rst
@@ -36,11 +36,7 @@ Database Connection Changes
Granting or revoking access privilege in the database for the service node.
-* For mysql, refer to :ref:`grante_revoke_mysql_access_label`.
-.. There is no procedure in old document on sourceforge for postgress to
- grant or revoke the access privilege for service node.
-
-* For postgress, refer to `TODO `_.
+* For MySQL, refer to :ref:`grante_revoke_mysql_access_label`.
Update Provision Environment on Service Node
--------------------------------------------
diff --git a/docs/source/advanced/domain_name_resolution/domain_name_resolution.rst b/docs/source/advanced/domain_name_resolution/domain_name_resolution.rst
index d8745b6ee..984bf79fe 100644
--- a/docs/source/advanced/domain_name_resolution/domain_name_resolution.rst
+++ b/docs/source/advanced/domain_name_resolution/domain_name_resolution.rst
@@ -459,19 +459,6 @@ You can either specify the base nic* attribute name or the expanded name for a s
nicips.eth1=11.1.89.7
nichostnamesuffixes.eth1=-lab
-"otherinterfaces" vs. nic* attributes
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-For now the "otherinterfaces" attribute will still be supported but it may be dropped in future releases.Any new network interface information should be provided using the new nic attributes.
-
-If you are currently using the "otherinterfaces" node attribute you do not have to move it to the nic* attributes at this time. However, be careful to avoid any overlap or conflict with the information provided for each.
-
-If you are using "otherinterfaces" and add additional interfaces using the nic* attributes the makehosts command will add both to the /etc/hosts table.
-
-When both the "otherinterfaces" and nic attributes are used the "otherinterfaces" attribute is processed before the nic attributes.
-
-To do more nic introduce (refer to :ref:`confignics_label` ).
-
Setting addition interface information using the xCAT tabedit command
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/docs/source/advanced/hamn/setup_ha_mgmt_node_with_shared_data.rst b/docs/source/advanced/hamn/setup_ha_mgmt_node_with_shared_data.rst
index 5a80aade9..6a3e1b98a 100644
--- a/docs/source/advanced/hamn/setup_ha_mgmt_node_with_shared_data.rst
+++ b/docs/source/advanced/hamn/setup_ha_mgmt_node_with_shared_data.rst
@@ -56,7 +56,7 @@ The configuration procedure will be quite different based on the shared data mec
/
-``Note``:For mysql, the database directory is ``/var/lib/mysql``; for postgresql, the database directory is ``/var/lib/pgsql``; for DB2, the database directory is specified with the site attribute databaseloc; for sqlite, the database directory is /etc/xcat, already listed above.
+``Note``:For MySQL, the database directory is ``/var/lib/mysql``; for PostGreSQL, the database directory is ``/var/lib/pgsql``; for DB2, the database directory is specified with the site attribute databaseloc; for sqlite, the database directory is /etc/xcat, already listed above.
Here is an example of how to make directories be shared data through NFS: ::
@@ -125,7 +125,7 @@ Setup xCAT on the Primary Management Node
service dhcpd stop
chkconfig --level 2345 dhcpd off
-#. Stop Database and prevent the database from auto starting at boot time, use mysql as an example: ::
+#. Stop Database and prevent the database from auto starting at boot time, use MySQL as an example: ::
service mysqld stop
chkconfig mysqld off
@@ -180,7 +180,7 @@ Setup xCAT on the Standby Management Node
service dhcpd stop
chkconfig --level 2345 dhcpd off
-#. Stop Database and prevent the database from auto starting at boot time. Use mysql as an example: ::
+#. Stop Database and prevent the database from auto starting at boot time. Use MySQL as an example: ::
service mysqld stop
chkconfig mysqld off
@@ -303,7 +303,7 @@ If the management node is still available and running the cluster, perform the f
#. Stop database
- Use mysql as an example: ::
+ Use MySQL as an example: ::
service mysqld stop
@@ -346,7 +346,7 @@ On the new primary management node:
mount /.xcat
mount /db2database
-#. Start database, use mysql as an example: ::
+#. Start database, use MySQL as an example: ::
service mysql start
diff --git a/docs/source/advanced/hierarchy/appendix/appendix_b_diagnostics.rst b/docs/source/advanced/hierarchy/appendix/appendix_b_diagnostics.rst
index ca259ecb3..d30fc59db 100644
--- a/docs/source/advanced/hierarchy/appendix/appendix_b_diagnostics.rst
+++ b/docs/source/advanced/hierarchy/appendix/appendix_b_diagnostics.rst
@@ -1,66 +1,18 @@
Appendix B: Diagnostics
=======================
-* **root ssh keys not setup** -- If you are prompted for a password when ssh to
- the service node, then check to see if /root/.ssh has authorized_keys. If
- the directory does not exist or no keys, on the MN, run xdsh service -K,
- to exchange the ssh keys for root. You will be prompted for the root
- password, which should be the password you set for the key=system in the
- passwd table.
-* **XCAT rpms not on SN** --On the SN, run rpm -qa | grep xCAT and make sure
- the appropriate xCAT rpms are installed on the servicenode. See the list of
- xCAT rpms in :ref:`setup_service_node_stateful_label`. If rpms
- missing check your install setup as outlined in Build the Service Node
- Stateless Image for diskless or :ref:`setup_service_node_stateful_label` for
- diskful installs.
-* **otherpkgs(including xCAT rpms) installation failed on the SN** --The OS
- repository is not created on the SN. When the "yum" command is processing
- the dependency, the rpm packages (including expect, nmap, and httpd, etc)
- required by xCATsn can't be found. In this case, check whether the
- ``/install/postscripts/repos///`` directory exists on the MN.
- If it is not on the MN, you need to re-run the "copycds" command, and there
- will be some file created under the
- ``/install/postscripts/repos//`` directory on the MN. Then, you
- need to re-install the SN, and this issue should be gone.
-* **Error finding the database/starting xcatd** -- If on the Service node when
- you run tabdump site, you get "Connection failure: IO::Socket::SSL:
- connect: Connection refused at ``/opt/xcat/lib/perl/xCAT/Client.pm``". Then
- restart the xcatd daemon and see if it passes by running the command:
- service xcatd restart. If it fails with the same error, then check to see
- if ``/etc/xcat/cfgloc`` file exists. It should exist and be the same as
- ``/etc/xcat/cfgloc`` on the MN. If it is not there, copy it from the MN to
- the SN. The run service xcatd restart. This indicates the servicenode
- postscripts did not complete successfully. Check to see your postscripts
- table was setup correctly in :ref:`add_service_node_postscripts_label` to the
- postscripts table.
-* **Error accessing database/starting xcatd credential failure**-- If you run
- tabdump site on the servicenode and you get "Connection failure:
- IO::Socket::SSL: SSL connect attempt failed because of handshake
- problemserror:14094418:SSL routines:SSL3_READ_BYTES:tlsv1 alert unknown ca
- at ``/opt/xcat/lib/perl/xCAT/Client.pm``", check ``/etc/xcat/cert``. The
- directory should contain the files ca.pem and server-cred.pem. These were
- suppose to transfer from the MN ``/etc/xcat/cert`` directory during the
- install. Also check the ``/etc/xcat/ca`` directory. This directory should
- contain most files from the ``/etc/xcat/ca`` directory on the MN. You can
- manually copy them from the MN to the SN, recursively. This indicates the
- the servicenode postscripts did not complete successfully. Check to see
- your postscripts table was setup correctly in
- :ref:`add_service_node_postscripts_label` to the postscripts table. Again
- service xcatd restart and try the tabdump site again.
-* **Missing ssh hostkeys** -- Check to see if ``/etc/xcat/hostkeys`` on the SN,
- has the same files as ``/etc/xcat/hostkeys`` on the MN. These are the ssh
- keys that will be installed on the compute nodes, so root can ssh between
- compute nodes without password prompting. If they are not there copy them
- from the MN to the SN. Again, these should have been setup by the
- servicenode postscripts.
+* **root ssh keys not setup** -- If you are prompted for a password when ssh to the service node, then check to see if ``/root/.ssh`` directory on MN has ``authorized_keys`` file. If the directory does not exist or no keys, run ``xdsh service -K``, to exchange the ssh keys for root. You will be prompted for the root password, which should be the password you set for the ``key=system`` in the passwd table.
-* **Errors running hierarchical commands such as xdsh** -- xCAT has a number of
- commands that run hierarchically. That is, the commands are sent from xcatd
- on the management node to the correct service node xcatd, which in turn
- processes the command and sends the results back to xcatd on the management
- node. If a hierarchical command such as xcatd fails with something like
- "Error: Permission denied for request", check ``/var/log/messages`` on the
- management node for errors. One error might be "Request matched no policy
- rule". This may mean you will need to add policy table entries for your
- xCAT management node and service node:
+* **XCAT rpms not on SN** -- On the SN, run ``rpm -qa | grep xCAT`` and make sure the appropriate xCAT rpms are installed on the servicenode. See the list of xCAT rpms in :ref:`setup_service_node_stateful_label`. If rpms are missing, check your install setup as outlined in :ref:`setup_service_node_stateless_label` for diskless or :ref:`setup_service_node_stateful_label` for diskful installs.
+* **otherpkgs(including xCAT rpms) installation failed on the SN** -- The OS repository is not created on the SN. When the "yum" command is processing the dependency, the rpm packages (including expect, nmap, and httpd, etc) required by xCATsn can't be found. In this case, check whether the ``/install/postscripts/repos///`` directory exists on the MN. If it is not on the MN, you need to re-run the ``copycds`` command, and there will be files created under the ``/install/postscripts/repos//`` directory on the MN. Then, you need to re-install the SN.
+
+* **Error finding the database/starting xcatd** -- If on the Service node when you run tabdump site, you get "Connection failure: IO::Socket::SSL: connect: Connection refused at ``/opt/xcat/lib/perl/xCAT/Client.pm``". Then restart the xcatd daemon and see if it passes by running the command ``service xcatd restart``. If it fails with the same error, then check to see if ``/etc/xcat/cfgloc`` file exists. It should exist and be the same as ``/etc/xcat/cfgloc`` on the MN. If it is not there, copy it from the MN to the SN. The run ``service xcatd restart``. This indicates the servicenode postscripts did not complete successfully. Run ``lsdef -i postscripts -c`` and verify ``servicenode`` postscript appears on the list..
+
+* **Error accessing database/starting xcatd credential failure**-- If you run ``tabdump site`` on the service node and get "Connection failure: IO::Socket::SSL: SSL connect attempt failed because of handshake problemserror:14094418:SSL routines:SSL3_READ_BYTES:tlsv1 alert unknown at ``/opt/xcat/lib/perl/xCAT/Client.pm``", check ``/etc/xcat/cert``. The directory should contain the files ``ca.pem`` and ``server-cred.pem``. These were suppose to transfer from the MN ``/etc/xcat/cert`` directory during the install. Also check the ``/etc/xcat/ca`` directory. This directory should contain most files from the ``/etc/xcat/ca`` directory on the MN. You can manually copy them from the MN to the SN, recursively. This indicates the the servicenode postscripts did not complete successfully. Run ``lsdef -i postscripts -c`` and verify ``servicenode`` postscript appears on the list. Run ``service xcatd restart`` again and try the tabdump site again.
+
+* **Missing ssh hostkeys** -- Check to see if ``/etc/xcat/hostkeys`` on the SN, has the same files as ``/etc/xcat/hostkeys`` on the MN. These are the ssh keys that will be installed on the compute nodes, so root can ssh between compute nodes without password prompting. If they are not there copy them from the MN to the SN. Again, these should have been setup by the servicenode postscripts.
+
+* **Errors running hierarchical commands such as xdsh** -- xCAT has a number of commands that run hierarchically. That is, the commands are sent from xcatd on the management node to the correct service node xcatd, which in turn processes the command and sends the results back to xcatd on the management node. If a hierarchical command such as xcatd fails with something like "Error: Permission denied for request", check ``/var/log/messages`` on the management node for errors. One error might be "Request matched no policy rule". This may mean you will need to add policy table entries for your xCAT management node and service node.
+
+* **/install is not mounted on service node from managemen mode** -- If service node does not have ``/install`` directory mounted from management node, run ``lsdef -t site clustersite -i installloc`` and verify ``installloc="/install"``
diff --git a/docs/source/advanced/hierarchy/define_service_nodes.rst b/docs/source/advanced/hierarchy/define_service_nodes.rst
index e031b56c9..d35427e73 100644
--- a/docs/source/advanced/hierarchy/define_service_nodes.rst
+++ b/docs/source/advanced/hierarchy/define_service_nodes.rst
@@ -28,6 +28,11 @@ The following table illustrates the cluster being used in this example:
| | r2n10 |
+----------------------+----------------------+
+
+#. Modify ``site`` table attribute to include **service** group's postscripts in compute node definition: ::
+
+ chdef -t site hierarchicalattrs="postscripts"
+
#. Select the compute nodes that will become service nodes
The first node in each rack, ``r1n01`` and ``r2n01``, is selected to become the xCAT service nodes and manage the compute nodes in that rack
@@ -53,7 +58,7 @@ The following table illustrates the cluster being used in this example:
setupconserver=1
**Tips/Hint**
- * Even if you do not want xCAT to configure any services, you must define the service nodes in the ``servicenode`` table with at least one attribute, set to 0, otherwise xCAT will not recognize the node as a service node**
+ * Even if you do not want xCAT to configure any services, you must define the service nodes in the ``servicenode`` table with at least one attribute, set to 0, otherwise xCAT will not recognize the node as a service node
* See the ``setup*`` attributes in the node definition man page for the list of available services: ``man node``
* For clusters with subnetted management networks, you might want to set ``setupupforward=1``
@@ -82,4 +87,20 @@ The following table illustrates the cluster being used in this example:
chdef -t group -o rack1 conserver=r1n01 monserver=r1n01
chdef -t group -o rack2 conserver=r2n01 monserver=r2n01
+#. Choose location of ``/install`` and ``/tftpboot`` directories (optional).
+ The ``site`` table attributes ``installloc`` and ``sharedtftp`` control mounting of ``/install`` and ``/tftpboot`` directories from Management Node to Service node.
+
+ To mount ``/install`` and ``/tftpboot`` directories from Management node to each Service Node: ::
+
+ chdef -t site clustersite sharedtftp=1
+ chdef -t site clustersite installloc="/install"
+
+ To make ``/install`` and ``/tftpboot`` directories local on each Service Node, set ``site`` table attributes and "sync" ``/install`` and ``/tftpoot`` directory contents from Management Node to Service Nodes: ::
+
+ chdef -t site clustersite sharedtftp=0
+ chdef -t site clustersite installloc=
+ rsync -auv --exclude 'autoinst' /install r1n01:/
+ rsync -auv --exclude 'autoinst' /install r2n01:/
+ rsync -auv --exclude 'autoinst' /tftpboot r1n01:/
+ rsync -auv --exclude 'autoinst' /tftpboot r2n01:/
diff --git a/docs/source/advanced/hierarchy/provision/diskful_sn.rst b/docs/source/advanced/hierarchy/provision/diskful_sn.rst
index 20813fd38..972a81712 100644
--- a/docs/source/advanced/hierarchy/provision/diskful_sn.rst
+++ b/docs/source/advanced/hierarchy/provision/diskful_sn.rst
@@ -6,33 +6,51 @@ Diskful (Stateful) Installation
Any cluster using statelite compute nodes must use a stateful (diskful) Service Nodes.
-**Note: All xCAT Service Nodes must be at the exact same xCAT version as the xCAT Management Node**. Copy the files to the Management Node (MN) and untar them in the appropriate sub-directory of ``/install/post/otherpkgs``
+**Note:** All xCAT Service Nodes must be at the exact same xCAT version as the xCAT Management Node.
-**Note for the appropriate directory below, check the ``otherpkgdir=/install/post/otherpkgs/rhels7/x86_64`` attribute of the osimage defined for the servicenode.**
-
-For example, for osimage rhels7-x86_64-install-service ::
+Configure ``otherpkgdir`` and ``otherpkglist`` for service node osimage
+-----------------------------------------------------------------------
- mkdir -p /install/post/otherpkgs/**rhels7**/x86_64/xcat
- cd /install/post/otherpkgs/**rhels7**/x86_64/xcat
+ * Create a subdirectory ``xcat`` under a path specified by ``otherpkgdir`` attribute of the service node os image, selected during the :doc:`../define_service_nodes` step.
+
+ For example, for osimage *rhels7-x86_64-install-service* ::
+
+ [root@fs4 xcat]# lsdef -t osimage rhels7-x86_64-install-service -i otherpkgdir
+ Object name: rhels7-x86_64-install-service
+ otherpkgdir=/install/post/otherpkgs/rhels7/x86_64
+ [root@fs4 xcat]# mkdir -p /install/post/otherpkgs/rhels7/x86_64/xcat
+
+ * Download or copy `xcat-core` and `xcat-dep` .bz2 files into that `xcat` directory ::
+
+ wget https://xcat.org/files/xcat/xcat-core/_Linux/xcat-core/xcat-core--linux.tar.bz2
+ wget https://xcat.org/files/xcat/xcat-dep/_Linux/xcat-dep--linux.tar.bz2
+
+ * untar the `xcat-core` and `xcat-dep` .bz2 files ::
+
+ cd /install/post/otherpkgs///xcat
tar jxvf core-rpms-snap.tar.bz2
tar jxvf xcat-dep-*.tar.bz2
-Next, add rpm names into your own version of service...otherpkgs.pkglist file. In most cases, you can find an initial copy of this file under ``/opt/xcat/share/xcat/install/`` . Or copy one from another similar platform. ::
+ * Verify the following entries are included in the package file specified by the ``otherpkglist`` attribute of the service node osimage. ::
- mkdir -p /install/custom/install/rh
- cp /opt/xcat/share/xcat/install/rh/service.rhels7.x86_64.otherpkgs.pkglist \
- /install/custom/install/rh
- vi /install/custom/install/rh/service.rhels7.x86_64.otherpkgs.pkglist
+ xcat/xcat-dep///xCATsn
+ xcat/xcat-dep///conserver-xcat
+ xcat/xcat-dep///perl-Net-Telnet
+ xcat/xcat-dep///perl-Expect
-Make sure the following entries are included in the
-/install/custom/install/rh/service.rhels7.x86_64.otherpkgs.pkglist: ::
+ For example, for the osimage *rhels7-x86_64-install-service* ::
- xCATsn
- conserver-xcat
- perl-Net-Telnet
- perl-Expect
+ [root@fs4 ~]# lsdef -t osimage rhels7-x86_64-install-service -i otherpkglist
+ Object name: rhels7-x86_64-install-service
+ otherpkglist=/opt/xcat/share/xcat/install/rh/service.rhels7.x86_64.otherpkgs.pkglist
+ [root@fs4 ~]# cat /opt/xcat/share/xcat/install/rh/service.rhels7.x86_64.otherpkgs.pkglist
+ xcat/xcat-core/xCATsn
+ xcat/xcat-dep/rh7/x86_64/conserver-xcat
+ xcat/xcat-dep/rh7/x86_64/perl-Net-Telnet
+ xcat/xcat-dep/rh7/x86_64/perl-Expect
+ [root@fs4 ~]#
-**Note: you will be installing the xCAT Service Node rpm xCATsn meta-package on the Service Node, not the xCAT Management Node meta-package. Do not install both.**
+**Note:** you will be installing the xCAT Service Node rpm xCATsn meta-package on the Service Node, not the xCAT Management Node meta-package. Do not install both.
Update the rhels6 RPM repository (rhels6 only)
----------------------------------------------
@@ -69,26 +87,18 @@ Update the rhels6 RPM repository (rhels6 only)
createrepo \
-g repodata /98462d05248098ef1724eddb2c0a127954aade64d4bb7d4e693cff32ab1e463c-comps-rhel6-Server.xml
-**Note:** you should use comps-rhel6-Server.xml with its key as the group file.
+**Note:** you should use ``comps-rhel6-Server.xml`` with its key as the group file.
-Set the node status to ready for installation
----------------------------------------------
-
-Run nodeset to the osimage name defined in the provmethod attribute on your Service Node. ::
-
- nodeset service osimage=""
-
-For example ::
-
- nodeset osimage="rhels7-x86_64-install-service"
-
-Initialize network boot to install Service Nodes
-------------------------------------------------
+Install Service Nodes
+---------------------
::
- rsetboot net
- rpower boot
+ rinstall osimage=""
+
+For example ::
+
+ rinstall osimage="rhels7-x86_64-install-service"
Monitor the Installation
------------------------
diff --git a/docs/source/advanced/hierarchy/provision/diskless_sn.rst b/docs/source/advanced/hierarchy/provision/diskless_sn.rst
index 4f521aa48..717d3ee18 100644
--- a/docs/source/advanced/hierarchy/provision/diskless_sn.rst
+++ b/docs/source/advanced/hierarchy/provision/diskless_sn.rst
@@ -1,3 +1,5 @@
+.. _setup_service_node_stateless_label:
+
Diskless (Stateless) Installation
=================================
diff --git a/docs/source/advanced/hierarchy/provision/verify_sn.rst b/docs/source/advanced/hierarchy/provision/verify_sn.rst
index 0b111fab2..3c6210ef1 100644
--- a/docs/source/advanced/hierarchy/provision/verify_sn.rst
+++ b/docs/source/advanced/hierarchy/provision/verify_sn.rst
@@ -2,10 +2,10 @@ Verify Service Node Installation
================================
* ssh to the service nodes. You should not be prompted for a password.
-* Check to see that the xcat daemon xcatd is running.
-* Run some database command on the service node, e.g tabdump site, or nodels,
- and see that the database can be accessed from the service node.
-* Check that ``/install`` and ``/tftpboot`` are mounted on the service node
- from the Management Node, if appropriate.
-* Make sure that the Service Node has Name resolution for all nodes, it will
- service.
+* Check to see that the xcat daemon ``xcatd`` is running.
+* Run some database command on the service node, e.g ``tabdump site``, or ``nodels``, and see that the database can be accessed from the service node.
+* Check that ``/install`` and ``/tftpboot`` are mounted on the service node from the Management Node, if appropriate.
+* Make sure that the Service Node has name resolution for all nodes it will service.
+* Run ``updatenode -V -s`` on management node and verify output contains ``Running command on `` that indicates the command from management node is sent to service node to run against compute node target.
+
+See :doc:`Appendix B <../appendix/appendix_b_diagnostics>` for possible solutions.
diff --git a/docs/source/advanced/index.rst b/docs/source/advanced/index.rst
index 7193f3b71..3a78e0ac9 100755
--- a/docs/source/advanced/index.rst
+++ b/docs/source/advanced/index.rst
@@ -15,6 +15,7 @@ Advanced Topics
kit/index.rst
mixed_cluster/index.rst
networks/index.rst
+ pdu/index.rst
ports/xcat_ports.rst
probe/index.rst
raid/index.rst
diff --git a/docs/source/advanced/networks/edgecore_switches/index.rst b/docs/source/advanced/networks/edgecore_switches/index.rst
deleted file mode 100644
index 0c897d340..000000000
--- a/docs/source/advanced/networks/edgecore_switches/index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-ONIE Compatible Bare Metal Switches
-===================================
-
-.. toctree::
- :maxdepth: 2
-
- onie_switches.rst
diff --git a/docs/source/advanced/networks/edgecore_switches/onie_switches.rst b/docs/source/advanced/networks/edgecore_switches/onie_switches.rst
deleted file mode 100644
index ba079c23c..000000000
--- a/docs/source/advanced/networks/edgecore_switches/onie_switches.rst
+++ /dev/null
@@ -1,135 +0,0 @@
-ONIE compatible bare metal switch
-=================================
-
-The ONIE [1]_. compatible bare metal switches(abbreviated as "ONIE switch") from vendors such as Mellanox or Edgecore are often used as top-of-rack switches in the cluster. Usually, the switches are shipped with a Cumulus Network OS(https://cumulusnetworks.com) and a license pre-installed. In some cases, user may get whitebox switch hardware with a standalone Cumulus installer and license file. This documentation presents a typical workflow on how to setup ONIE switch from white box, then configure and manage the switch with xCAT.
-
-.. [1] Open Network Install Environment: Created by Cumulus Networks, Inc. in 2012, the Open Network Install Environment (ONIE) Project is a small operating system, pre-installed as firmware on bare metal network switches, that provides an environment for automated operating system provisioning.
-
-Create an ONIE switch object
--------------------------------
-
-The ONIE switch object can be created with the "onieswitch" template shipped in xCAT, the ip address and mac of the switch management ethernet port should be specified : ::
-
- mkdef edgecoresw1 --template onieswitch arch=armv71 ip=192.168.5.191 mac=8C:EA:1B:12:CA:40
-
-Provision the Cumulus OS on ONIE switch
----------------------------------------
-
-To provision Cumulus OS, the Cumulus installation file, a binary shipped with the switch, should be saved in a directory exported in the http server.
-
-Run ``chdef`` to specify the "provmethod" attribute of the switch object to the full path of the installation file: ::
-
- chdef edgecoresw1 netboot=onie provmethod="/install/custom/sw/edgecore/cumulus-linux-3.1.0-bcm-armel-1471981017.dc7e2adzfb43f6b.bin"
-
-Run ``makedhcp`` to prepare the DHCP/BOOTP lease. ::
-
- makedhcp -a edgecoresw1
-
-The command or operation to start the provision dependes on the status of switch:
-
-1. If the switch is a white box without Cumulus OS installed, simply connect the management ethernet port of the switch to xCAT management node, then power on the switch.
-
-2. If a Cumulus OS has been installed on the switch, you need to login to the switch(the default user is ``cumulus`` and the password is ``CumulusLinux!``) and run a batch of commands: ::
-
- sudo onie-select -i
- sudo reboot
-
-If the passwordless-ssh of "root" has been enabled, the commands can be issued with: ::
-
- xdsh edgecoresw1 "/usr/cumulus/bin/onie-select -i -f;reboot"
-
-After reboot, the switch will enter ONIE install mode and begin the installation. The provision might take about 50 minutes.
-
-
-Switch Configuration
---------------------
-
-Enable the passwordless ssh for "root"
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-In a newly installed Cumulus OS, a default user ``cumulus`` will be created, the switch can be accessed via ssh with the default password ``CumulusLinux!``.
-
-The passwordless ssh access of "root" should be enabled with the script ``/opt/xcat/share/xcat/scripts/configonie`` ::
-
- /opt/xcat/share/xcat/scripts/configonie --switches edgecoresw1 --ssh
-
-After the passwordless access for "root" is setup successfully, the switch can be managed with the node management commands such as ``xdsh``, ``xdcp`` and ``updatenode``, etc.
-
-Licence file installation
-~~~~~~~~~~~~~~~~~~~~~~~~~
-
-On the newly installed switch, only the serial console and the management ethernet port are enabled. To activate the data ports, the licence file shipped with the switch should be installed: ::
-
- xdcp edgecoresw1 /install/custom/sw/edgecore/licensefile.txt /tmp
- xdsh edgecoresw1 "/usr/cumulus/bin/cl-license -i /tmp/licensefile.txt"
-
-To check whether the license file is installed successfully: ::
-
- ~: xdsh edgecoresw1 /usr/cumulus/bin/cl-license
- edgecoresw1: xxx@xx.com|xxxxxxxxxxxxxxx
-
-Reboot the switch to apply the licence file: ::
-
- xdsh edgecoresw1 reboot
-
-Enable SNMP
-~~~~~~~~~~~
-
-The snmpd in the switch is not enabled by default, xCAT ships a postscript to enable it: ::
-
- updatenode edgecoresw1 -P enablesnmp
-
-
-Switch Discovery
-----------------
-
-The ONIE switch can be scaned and discovered with ``switchdiscover`` ::
-
- ~: switchdiscover --range 192.168.23.1-10
- Discovering switches using nmap for 192.168.23.1-10. It may take long time...
- ip name vendor mac
- ------------ ------------ ------------ ------------
- 192.168.23.1 edgecoresw1 Edgecore switch 8C:EA:1B:12:CA:40
- Switch discovered: edgecoresw1
-
-Once SNMP on the ONIE switch is enabled, the ONIE switch can be discovered with "snmp" method: ::
-
- ~: switchdiscover --range 192.168.23.1-10 -s snmp
- Discovering switches using snmpwalk for 192.168.23.1-10 ....
- ip name vendor mac
- ------------ ------------ ------------ ------------
- 192.168.23.1 edgecoresw1 Linux edgecoresw1 4.1.0-cl-2-iproc #1 SMP Debian 4.1.25-1+cl3u4 (2016-08-13) armv7l 8c:ea:1b:12:ca:40
- Switch discovered: edgecoresw1
-
-
-Switch Management
------------------
-
-File Dispatch
-~~~~~~~~~~~~~
-
-The files can be dispatched to ONIE switches with ``xdcp`` ::
-
- xdcp edgecoresw1
-
-Refer to :doc:`xdcp manpage ` for details.
-
-Remote Commands
-~~~~~~~~~~~~~~~
-
-Commands can be run on ONIE switches remotely with ``xdsh`` ::
-
- xdsh edgecoresw1
-
-Refer to :doc:`xdsh manpage ` for details.
-
-Run scripts remotely
-~~~~~~~~~~~~~~~~~~~~
-
-The scripts under "/install/postscripts" can be run on ONIE switches with ``updatenode -P`` ::
-
- updatenode edgecoresw1 -P