commit c6a691209472af7c3161dae3c65749b6f9705e39 Author: Arif Ali Date: Wed Jun 10 11:53:11 2020 +0000 Initial commit Added initial scripts and configs to get HPL compiled on raspberry pi4 and get reasonable performance diff --git a/README.md b/README.md new file mode 100644 index 0000000..ae8bd80 --- /dev/null +++ b/README.md @@ -0,0 +1,16 @@ +This is a collection of scripts and configuration that would +allow to compile and run the benchmark in a consistent way + +The following will clone, and compile all the stuff + +```bash +git clone https://gitlab.arif-ali.co.uk:8543/arif/raspberrypi-hpl +cd raspberrypi-hpl/scripts +./compile_all.sh +``` + +Then we can run the benchmark using the following script + +```bash +./run_job.sh +``` diff --git a/configs/HPL.dat b/configs/HPL.dat new file mode 100644 index 0000000..5dcdf84 --- /dev/null +++ b/configs/HPL.dat @@ -0,0 +1,36 @@ +HPLinpack benchmark input file +Innovative Computing Laboratory, University of Tennessee +HPL.out output file name (if any) +6 device out (6=stdout,7=stderr,file) +2 # of problems sizes (N) +17280 17280 Ns +5 # of NBs +192 192 192 192 192 NBs +0 PMAP process mapping (0=Row-,1=Column-major) +1 # of process grids (P x Q) +1 1 1 1 1 Ps +1 1 1 1 1 Qs +16.0 threshold +1 # of panel fact +2 PFACTs (0=left, 1=Crout, 2=Right) +1 # of recursive stopping criterium +4 NBMINs (>= 1) +1 # of panels in recursion +2 NDIVs +1 # of recursive panel fact. +1 RFACTs (0=left, 1=Crout, 2=Right) +1 # of broadcast +1 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) +1 # of lookahead depth +1 DEPTHs (>=0) +2 SWAP (0=bin-exch,1=long,2=mix) +64 swapping threshold +0 L1 in (0=transposed,1=no-transposed) form +0 U in (0=transposed,1=no-transposed) form +1 Equilibration (0=no,1=yes) +8 memory alignment in double (> 0) +##### This line (no. 32) is ignored (it serves as a separator). ###### +0 Number of additional problem sizes for PTRANS +1200 10000 30000 values of N +0 number of additional blocking sizes for PTRANS +40 9 8 13 13 20 16 32 64 values of NB diff --git a/configs/Make.rpi4-mpich b/configs/Make.rpi4-mpich new file mode 100644 index 0000000..0cb9026 --- /dev/null +++ b/configs/Make.rpi4-mpich @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -fs +MKDIR = mkdir -p +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = rpi4-mpich +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/rpi-hpl-workdir/hpl-2.3 +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /opt/mpich/3.3.2 +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpi.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = /opt/OpenBLAS +LAinc = $(LAdir)/include +LAlib = $(LAdir)/lib/libopenblas.a -lpthread +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) -I$(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lrt -lbacktrace +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_DETAILED_TIMING -DHPL_PROGRESS_REPORT -DHPL_CALL_CBLAS +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = gcc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall -mtune=cortex-a72 +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = $(CC) +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/scripts/CONFIG b/scripts/CONFIG new file mode 100644 index 0000000..303219a --- /dev/null +++ b/scripts/CONFIG @@ -0,0 +1,8 @@ +export DOWNLOADS=~/Downloads +export WORKDIR=~/rpi-hpl-workdir +export SCRIPTSDIR=${PWD} +export SERVICES="snap.lxd.daemon snap.lxd.daemon.unix.socket postfix systemd-timesyncd wpa_supplicant snapd snapd.apparmor.service systemd-resolved snapd.service snapd.socket" + +# Flags based on https://en.wikichip.org/wiki/arm_holdings/microarchitectures/cortex-a72 +export COMMON_FLAGS="-mtune=cortex-a72" + diff --git a/scripts/compile_all.sh b/scripts/compile_all.sh new file mode 100755 index 0000000..a3e6109 --- /dev/null +++ b/scripts/compile_all.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +. CONFIG + +mkdir -p ${DOWNLOADS} +mkdir -p ${WORKDIR} + +./install_deps.sh +./make_mpich.sh +./make_openblas.sh +./make_hpl.sh diff --git a/scripts/install_deps.sh b/scripts/install_deps.sh new file mode 100755 index 0000000..6905eee --- /dev/null +++ b/scripts/install_deps.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +sudo apt update +sudo apt -y upgrade + +dep_pkgs="gcc g++ gfortran make" + +sudo apt -y install ${dep_pkgs} + diff --git a/scripts/make_hpl.sh b/scripts/make_hpl.sh new file mode 100755 index 0000000..8480743 --- /dev/null +++ b/scripts/make_hpl.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +. CONFIG + +version=2.3 +package=hpl + +rm -rf ${package}-${version} +rm -rf ${WORKDIR}/${package}-${version} +wget https://www.netlib.org/benchmark/hpl/${package}-${version}.tar.gz -O ${DOWNLOADS}/${package}-${version}.tar.gz + +cd ${WORKDIR} +tar xfz ${DOWNLOADS}/${package}-${version}.tar.gz +cd ${package}-${version} + +cp ${SCRIPTSDIR}/../configs/Make.rpi4-mpich . +make arch=rpi4-mpich -j 3 cleab_arch +make arch=rpi4-mpich -j 3 install diff --git a/scripts/make_mpich.sh b/scripts/make_mpich.sh new file mode 100755 index 0000000..5898617 --- /dev/null +++ b/scripts/make_mpich.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +version=3.3.2 +package=mpich + +rm -rf ${package}-${version} +rm -rf ${WORKDIR}/${package}-${version} +wget http://www.mpich.org/static/downloads/${version}/${package}-${version}.tar.gz -O ${DOWNLOADS}/${package}-${version}.tar.gz + +cd ${WORKDIR} +tar xfz ${DOWNLOADS}/${package}-${version}.tar.gz +cd ${package}-${version} + +export CFLAGS=${COMMON_FLAGS} +export FCFLAGS=${COMMON_FLAGS} +export LDFLAGS=${COMMON_FLAGS} + +./configure --prefix=/opt/${package}/${version} +make -j 3 +sudo make -j 3 install diff --git a/scripts/make_openblas.sh b/scripts/make_openblas.sh new file mode 100755 index 0000000..9a447b0 --- /dev/null +++ b/scripts/make_openblas.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +. CONFIG + +version=develop +package=OpenBLAS + +rm -rf ${package}-${version} +rm -rf ${WORKDIR}/${package}-${version} +wget https://github.com/xianyi/OpenBLAS/archive/develop.tar.gz -O ${DOWNLOADS}/${package}-${version}.tar.gz + +cd ${WORKDIR} +tar xfz ${DOWNLOADS}/${package}-${version}.tar.gz +cd ${package}-${version} + +make -j 3 +sudo make -j 3 install diff --git a/scripts/run_job.sh b/scripts/run_job.sh new file mode 100755 index 0000000..355a049 --- /dev/null +++ b/scripts/run_job.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +. CONFIG + +sudo systemctl stop ${SERVICES} + +cd ${SCRIPTSDIR}/../configs +${WORKDIR}/hpl-2.3/bin/rpi4-mpich/xhpl