Modified check_modules.sh script running the mpi_test_suite for allOpen MPI instalaltions.
This commit is contained in:
parent
fbddead2c0
commit
a386417f32
5 changed files with 156 additions and 8 deletions
133
check_mpi.sh
Executable file
133
check_mpi.sh
Executable file
|
@ -0,0 +1,133 @@
|
||||||
|
#!/bin/bash -l
|
||||||
|
#
|
||||||
|
# Christoph Niethammer <niethammer@hlrs.de>, (c) 2012
|
||||||
|
#
|
||||||
|
# Script checking the module environment for problems during module loading/unloading
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# check_modules.sh [MODULE]
|
||||||
|
#
|
||||||
|
# Options:
|
||||||
|
# MODULE May be any valid module path as used for 'module avail [MODULE]'
|
||||||
|
#
|
||||||
|
|
||||||
|
# Command line options:
|
||||||
|
moduleclass=mpi/openmpi # check only subset of modules
|
||||||
|
mpi_test_suite_dir=$HOME/mpi_test_suite.latest
|
||||||
|
|
||||||
|
# definitions for esear color output to display
|
||||||
|
Color_Off='\e[0m' # Text Reset
|
||||||
|
IGreen='\e[0;92m' # Intense Green
|
||||||
|
IRed='\e[0;91m' # Intense Red
|
||||||
|
|
||||||
|
# intermediate files, logfiles
|
||||||
|
LOGDIR=${LOGDIR:=$PWD}
|
||||||
|
tmpdir=/tmp/check_modules-$USER
|
||||||
|
mkdir -p $tmpdir
|
||||||
|
logfile="$LOGDIR/check_modules.log" # logfile with detailed information
|
||||||
|
module_load_logfile="$tmpdir/.module_load.log" # output of 'module load commands
|
||||||
|
module_rm_logfile="$tmpdir/.module_rm.log" # output of 'module rm' commands
|
||||||
|
module_clean_env_file="$tmpdir/.module_clean_env" # original environment
|
||||||
|
|
||||||
|
# safe the original environment
|
||||||
|
set > $module_clean_env_file
|
||||||
|
|
||||||
|
# list of all failed modules
|
||||||
|
failed_modules=()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
echo "--------------------" | tee $logfile
|
||||||
|
echo "Module environment check" | tee -a $logfile
|
||||||
|
echo "--------------------" | tee -a $logfile
|
||||||
|
echo "Date: $(date)" | tee -a $logfile
|
||||||
|
echo "Host: $(/bin/hostname)" | tee -a $logfile
|
||||||
|
echo "USER: $USER" | tee -a $logfile
|
||||||
|
echo "Logfile: $logfile" | tee -a $logfile
|
||||||
|
echo "--------------------" | tee -a $logfile
|
||||||
|
echo "Environment:" >> $logfile
|
||||||
|
cat $module_clean_env_file >> $logfile
|
||||||
|
echo "--------------------" >> $logfile
|
||||||
|
|
||||||
|
for m_original in $(module av -t $moduleclass 2>&1); do
|
||||||
|
if [[ $m_original =~ ^[A-Za-z] ]]; then # skip any non module line in output
|
||||||
|
m=$(echo $m_original | sed -e 's/(.*)//') # Remove aliases e.g. (default)
|
||||||
|
echo -n "Checking $m_original ... "
|
||||||
|
echo "Checking $m_original ... " >> $logfile
|
||||||
|
|
||||||
|
cmd="module load $m"
|
||||||
|
echo $cmd >> $logfile
|
||||||
|
$cmd > $module_load_logfile 2>&1
|
||||||
|
cat $module_load_logfile >> $logfile
|
||||||
|
module li >>$logfile 2>&1
|
||||||
|
|
||||||
|
# check if module was loaded and did not report errors during loading
|
||||||
|
if module li -t 2>&1 | grep $m >/dev/null && ! grep ERROR $module_load_logfile >/dev/null ; then
|
||||||
|
mpi_test_suite_log=$(echo $m | sed -e's/\//_/g').log
|
||||||
|
cd $mpi_test_suite_dir
|
||||||
|
make clean >$mpi_test_suite_log 2>&1
|
||||||
|
make -j $(grep processor /proc/cpuinfo | wc -l) >>$mpi_test_suite_log 2>&1
|
||||||
|
mpirun -np $(grep processor /proc/cpuinfo | wc -l) mpi_test_suite -t All,^io,^one-sided >>$mpi_test_suite_log 2>&1
|
||||||
|
if grep "Number of failed tests:0" $mpi_test_suite_log >/dev/null ; then
|
||||||
|
echo -en "${IGreen}passed test suite${Color_Off} ... "
|
||||||
|
echo -e "passed test suite" >> $logfile
|
||||||
|
else
|
||||||
|
echo -en "${IRed}Testsuite failed${Color_Off} "
|
||||||
|
echo "ERROR: Testsuite failed for module '$m'" >> $logfile
|
||||||
|
failed_modules=(${failed_modules[@]} $m_original)
|
||||||
|
fi
|
||||||
|
|
||||||
|
cmd="module rm $m"
|
||||||
|
echo $cmd >> $logfile
|
||||||
|
$cmd > $module_rm_logfile 2>&1
|
||||||
|
cat $module_rm_logfile >> $logfile
|
||||||
|
module li >>$logfile 2>&1
|
||||||
|
|
||||||
|
# check if module was unloaded
|
||||||
|
if module li -t 2>&1 | grep $m > /dev/null; then
|
||||||
|
echo -e "${IRed}unloading failed${Color_Off}"
|
||||||
|
echo "ERROR: unloading module '$m' failed" >> $logfile
|
||||||
|
failed_modules=(${failed_modules[@]} $m_original)
|
||||||
|
else
|
||||||
|
echo -e "${IGreen}success${Color_Off}"
|
||||||
|
echo "SUCCESS" >> $logfile
|
||||||
|
fi
|
||||||
|
|
||||||
|
else
|
||||||
|
echo -e "${IRed}loading failed${Color_Off}"
|
||||||
|
echo "ERROR: loading module '$m' failed" >> $logfile
|
||||||
|
failed_modules=(${failed_modules[@]} $m)
|
||||||
|
fi
|
||||||
|
|
||||||
|
# clean up module environment
|
||||||
|
cmd="module purge"
|
||||||
|
echo $cmd >> $logfile
|
||||||
|
$cmd >>$logfile 2>&1
|
||||||
|
module li >>$logfile 2>&1
|
||||||
|
echo "Resetting environment ..." >>$logfile
|
||||||
|
|
||||||
|
# Reset the complete environment manually to overcome problems with
|
||||||
|
# inconsistent internal caches of the module command after module purge.
|
||||||
|
source $module_clean_env_file 2>/dev/null
|
||||||
|
|
||||||
|
# clean up intermediate files
|
||||||
|
rm -f $module_load_logfile
|
||||||
|
rm -f $module_rm_logfile
|
||||||
|
|
||||||
|
echo >>$logfile 2>&1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# clean up file storing the initial environment
|
||||||
|
rm -f $module_clean_env_file
|
||||||
|
rm -rf $tmpdir
|
||||||
|
|
||||||
|
|
||||||
|
echo "----------------------------------------"
|
||||||
|
echo "Summary of failed modules (${#failed_modules[@]}):"
|
||||||
|
echo "----------------------------------------"
|
||||||
|
for m in ${failed_modules[@]}; do
|
||||||
|
echo "$m"
|
||||||
|
done
|
||||||
|
echo "----------------------------------------"
|
||||||
|
|
|
@ -24,6 +24,7 @@ parser.add_option("--logdir", metavar="DIR", dest="logdir")
|
||||||
parser.add_option("--startdate", metavar="YYYY-MM-DD", dest="startdate")
|
parser.add_option("--startdate", metavar="YYYY-MM-DD", dest="startdate")
|
||||||
parser.add_option("--enddate", metavar="YYYY-MM-DD", dest="enddate")
|
parser.add_option("--enddate", metavar="YYYY-MM-DD", dest="enddate")
|
||||||
parser.add_option("--nohpc", action="store_true", default=False, dest="nohpc", help="Exclude hpc* accounts from stats")
|
parser.add_option("--nohpc", action="store_true", default=False, dest="nohpc", help="Exclude hpc* accounts from stats")
|
||||||
|
parser.add_option("--printusers", action="store_true", default=False, dest="printusers", help="Print detailed user lists for module usage.")
|
||||||
(options, args) = parser.parse_args()
|
(options, args) = parser.parse_args()
|
||||||
if options.logdir :
|
if options.logdir :
|
||||||
logdir = options.logdir
|
logdir = options.logdir
|
||||||
|
@ -76,8 +77,10 @@ print "{0:60s}{1:>8s} {2:>8s}".format('module name', '# uses', '# users')
|
||||||
print "-"*78
|
print "-"*78
|
||||||
for (v,m) in sorted( ((v,k) for k,v in modulestats.iteritems()), reverse=True) :
|
for (v,m) in sorted( ((v,k) for k,v in modulestats.iteritems()), reverse=True) :
|
||||||
print "{0:60s}{1:>8d} {2:>8d}".format(m, v, len(moduleusers[m].keys()))
|
print "{0:60s}{1:>8d} {2:>8d}".format(m, v, len(moduleusers[m].keys()))
|
||||||
#print moduleusers[m]
|
if options.printusers :
|
||||||
|
print moduleusers[m]
|
||||||
print "-"*78
|
print "-"*78
|
||||||
print "{0:60s}{1:>8d} {2:>8d}".format('total', total_modules, len(total_users))
|
print "{0:60s}{1:>8d} {2:>8d}".format('total', total_modules, len(total_users))
|
||||||
#print sorted([ (u) for u in total_users ])
|
if options.printusers :
|
||||||
|
print sorted([ (u) for u in total_users ])
|
||||||
print "-"*78
|
print "-"*78
|
||||||
|
|
|
@ -34,7 +34,10 @@ src_unpack() {
|
||||||
}
|
}
|
||||||
|
|
||||||
src_pretest() {
|
src_pretest() {
|
||||||
make check
|
tar xfz ${SRC_POOL}/dejagnu-1.5.tar.gz
|
||||||
|
DEJAGNUDIR=$PWD/dejagnu-1.5
|
||||||
|
cd objdir
|
||||||
|
DEJAGNULIBS=$DEJAGNUDIR/lib make -k check
|
||||||
}
|
}
|
||||||
|
|
||||||
# Other interesting configure options:
|
# Other interesting configure options:
|
||||||
|
|
|
@ -24,7 +24,11 @@ enable_cuda=${enable_cuda:=0}
|
||||||
|
|
||||||
if [[ $PLATFORM == "hermit1" ]] ; then
|
if [[ $PLATFORM == "hermit1" ]] ; then
|
||||||
CONFIGURE_OPTS=" \
|
CONFIGURE_OPTS=" \
|
||||||
|
--build=x86_64-unknown-linux-gnu \
|
||||||
|
--host=x86_64-cray-linux-gnu \
|
||||||
|
--without-cross-prefix \
|
||||||
--with-platform=crayxe \
|
--with-platform=crayxe \
|
||||||
|
--disable-exectrace \
|
||||||
"
|
"
|
||||||
else
|
else
|
||||||
CONFIGURE_OPTS=" \
|
CONFIGURE_OPTS=" \
|
||||||
|
|
|
@ -49,14 +49,19 @@ fi
|
||||||
|
|
||||||
# use CUDA
|
# use CUDA
|
||||||
if [ $enable_cuda != 0 ] ; then
|
if [ $enable_cuda != 0 ] ; then
|
||||||
|
if [[ $PLATFORM == "hermit1" ]] ; then
|
||||||
|
module load craype-accel-nvidia35
|
||||||
|
else
|
||||||
module load cuda
|
module load cuda
|
||||||
|
fi
|
||||||
|
#--with-cuda-inc-dir=$CUDA_DIR/include \
|
||||||
|
#--with-cuda-lib-dir=$CUDA_DIR/lib \
|
||||||
|
#--with-cudart-lib-dir=$CUDA_DIR/lib64 \
|
||||||
|
#--with-cudart-dir=$CUDA_DIR \
|
||||||
CUDA_DIR=${CUDA_DIR:=$(dirname $(dirname $(which nvcc)))}
|
CUDA_DIR=${CUDA_DIR:=$(dirname $(dirname $(which nvcc)))}
|
||||||
CONFIGURE_OPTS="${CONFIGURE_OPTS} \
|
CONFIGURE_OPTS="${CONFIGURE_OPTS} \
|
||||||
--with-cuda-dir=$CUDA_DIR \
|
--with-cuda-dir=$CUDA_DIR \
|
||||||
--with-cuda-inc-dir=$CUDA_DIR/include \
|
-with-cupti-dir=$CUDA_DIR/extras/CUPTI
|
||||||
--with-cuda-lib-dir=$CUDA_DIR/lib \
|
|
||||||
--with-cudart-dir=$CUDA_DIR \
|
|
||||||
--with-cudart-lib-dir=$CUDA_DIR/lib64 \
|
|
||||||
"
|
"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue