From a386417f32346adf05f9001dcde2f11088456528 Mon Sep 17 00:00:00 2001 From: Christoph Niethammer Date: Thu, 12 Dec 2013 11:06:49 +0000 Subject: [PATCH] Modified check_modules.sh script running the mpi_test_suite for allOpen MPI instalaltions. --- check_mpi.sh | 133 ++++++++++++++++++ module_stats.py | 7 +- packages/compiler/gcc/gcc-4.7.2 | 5 +- .../vampirtrace/vampirtrace-5.14.1 | 4 + .../vampirtrace/vampirtrace-5.14.3-gpu1 | 15 +- 5 files changed, 156 insertions(+), 8 deletions(-) create mode 100755 check_mpi.sh diff --git a/check_mpi.sh b/check_mpi.sh new file mode 100755 index 0000000..81fe44b --- /dev/null +++ b/check_mpi.sh @@ -0,0 +1,133 @@ +#!/bin/bash -l +# +# Christoph Niethammer , (c) 2012 +# +# Script checking the module environment for problems during module loading/unloading +# +# Usage: +# check_modules.sh [MODULE] +# +# Options: +# MODULE May be any valid module path as used for 'module avail [MODULE]' +# + +# Command line options: +moduleclass=mpi/openmpi # check only subset of modules +mpi_test_suite_dir=$HOME/mpi_test_suite.latest + +# definitions for esear color output to display +Color_Off='\e[0m' # Text Reset +IGreen='\e[0;92m' # Intense Green +IRed='\e[0;91m' # Intense Red + +# intermediate files, logfiles +LOGDIR=${LOGDIR:=$PWD} +tmpdir=/tmp/check_modules-$USER +mkdir -p $tmpdir +logfile="$LOGDIR/check_modules.log" # logfile with detailed information +module_load_logfile="$tmpdir/.module_load.log" # output of 'module load commands +module_rm_logfile="$tmpdir/.module_rm.log" # output of 'module rm' commands +module_clean_env_file="$tmpdir/.module_clean_env" # original environment + +# safe the original environment +set > $module_clean_env_file + +# list of all failed modules +failed_modules=() + + + +echo "--------------------" | tee $logfile +echo "Module environment check" | tee -a $logfile +echo "--------------------" | tee -a $logfile +echo "Date: $(date)" | tee -a $logfile +echo "Host: $(/bin/hostname)" | tee -a $logfile +echo "USER: $USER" | tee -a $logfile +echo "Logfile: $logfile" | tee -a $logfile +echo "--------------------" | tee -a $logfile +echo "Environment:" >> $logfile +cat $module_clean_env_file >> $logfile +echo "--------------------" >> $logfile + +for m_original in $(module av -t $moduleclass 2>&1); do + if [[ $m_original =~ ^[A-Za-z] ]]; then # skip any non module line in output + m=$(echo $m_original | sed -e 's/(.*)//') # Remove aliases e.g. (default) + echo -n "Checking $m_original ... " + echo "Checking $m_original ... " >> $logfile + + cmd="module load $m" + echo $cmd >> $logfile + $cmd > $module_load_logfile 2>&1 + cat $module_load_logfile >> $logfile + module li >>$logfile 2>&1 + + # check if module was loaded and did not report errors during loading + if module li -t 2>&1 | grep $m >/dev/null && ! grep ERROR $module_load_logfile >/dev/null ; then + mpi_test_suite_log=$(echo $m | sed -e's/\//_/g').log + cd $mpi_test_suite_dir + make clean >$mpi_test_suite_log 2>&1 + make -j $(grep processor /proc/cpuinfo | wc -l) >>$mpi_test_suite_log 2>&1 + mpirun -np $(grep processor /proc/cpuinfo | wc -l) mpi_test_suite -t All,^io,^one-sided >>$mpi_test_suite_log 2>&1 + if grep "Number of failed tests:0" $mpi_test_suite_log >/dev/null ; then + echo -en "${IGreen}passed test suite${Color_Off} ... " + echo -e "passed test suite" >> $logfile + else + echo -en "${IRed}Testsuite failed${Color_Off} " + echo "ERROR: Testsuite failed for module '$m'" >> $logfile + failed_modules=(${failed_modules[@]} $m_original) + fi + + cmd="module rm $m" + echo $cmd >> $logfile + $cmd > $module_rm_logfile 2>&1 + cat $module_rm_logfile >> $logfile + module li >>$logfile 2>&1 + + # check if module was unloaded + if module li -t 2>&1 | grep $m > /dev/null; then + echo -e "${IRed}unloading failed${Color_Off}" + echo "ERROR: unloading module '$m' failed" >> $logfile + failed_modules=(${failed_modules[@]} $m_original) + else + echo -e "${IGreen}success${Color_Off}" + echo "SUCCESS" >> $logfile + fi + + else + echo -e "${IRed}loading failed${Color_Off}" + echo "ERROR: loading module '$m' failed" >> $logfile + failed_modules=(${failed_modules[@]} $m) + fi + + # clean up module environment + cmd="module purge" + echo $cmd >> $logfile + $cmd >>$logfile 2>&1 + module li >>$logfile 2>&1 + echo "Resetting environment ..." >>$logfile + + # Reset the complete environment manually to overcome problems with + # inconsistent internal caches of the module command after module purge. + source $module_clean_env_file 2>/dev/null + + # clean up intermediate files + rm -f $module_load_logfile + rm -f $module_rm_logfile + + echo >>$logfile 2>&1 + fi +done + +# clean up file storing the initial environment +rm -f $module_clean_env_file +rm -rf $tmpdir + + +echo "----------------------------------------" +echo "Summary of failed modules (${#failed_modules[@]}):" +echo "----------------------------------------" +for m in ${failed_modules[@]}; do + echo "$m" +done +echo "----------------------------------------" + diff --git a/module_stats.py b/module_stats.py index 5bd8eac..4c8a93f 100755 --- a/module_stats.py +++ b/module_stats.py @@ -24,6 +24,7 @@ parser.add_option("--logdir", metavar="DIR", dest="logdir") parser.add_option("--startdate", metavar="YYYY-MM-DD", dest="startdate") parser.add_option("--enddate", metavar="YYYY-MM-DD", dest="enddate") parser.add_option("--nohpc", action="store_true", default=False, dest="nohpc", help="Exclude hpc* accounts from stats") +parser.add_option("--printusers", action="store_true", default=False, dest="printusers", help="Print detailed user lists for module usage.") (options, args) = parser.parse_args() if options.logdir : logdir = options.logdir @@ -76,8 +77,10 @@ print "{0:60s}{1:>8s} {2:>8s}".format('module name', '# uses', '# users') print "-"*78 for (v,m) in sorted( ((v,k) for k,v in modulestats.iteritems()), reverse=True) : print "{0:60s}{1:>8d} {2:>8d}".format(m, v, len(moduleusers[m].keys())) - #print moduleusers[m] + if options.printusers : + print moduleusers[m] print "-"*78 print "{0:60s}{1:>8d} {2:>8d}".format('total', total_modules, len(total_users)) -#print sorted([ (u) for u in total_users ]) +if options.printusers : + print sorted([ (u) for u in total_users ]) print "-"*78 diff --git a/packages/compiler/gcc/gcc-4.7.2 b/packages/compiler/gcc/gcc-4.7.2 index df3b9cd..f4ca0c1 100755 --- a/packages/compiler/gcc/gcc-4.7.2 +++ b/packages/compiler/gcc/gcc-4.7.2 @@ -34,7 +34,10 @@ src_unpack() { } src_pretest() { - make check + tar xfz ${SRC_POOL}/dejagnu-1.5.tar.gz + DEJAGNUDIR=$PWD/dejagnu-1.5 + cd objdir + DEJAGNULIBS=$DEJAGNUDIR/lib make -k check } # Other interesting configure options: diff --git a/packages/performance/vampirtrace/vampirtrace-5.14.1 b/packages/performance/vampirtrace/vampirtrace-5.14.1 index 291bfea..2184f70 100755 --- a/packages/performance/vampirtrace/vampirtrace-5.14.1 +++ b/packages/performance/vampirtrace/vampirtrace-5.14.1 @@ -24,7 +24,11 @@ enable_cuda=${enable_cuda:=0} if [[ $PLATFORM == "hermit1" ]] ; then CONFIGURE_OPTS=" \ + --build=x86_64-unknown-linux-gnu \ + --host=x86_64-cray-linux-gnu \ + --without-cross-prefix \ --with-platform=crayxe \ + --disable-exectrace \ " else CONFIGURE_OPTS=" \ diff --git a/packages/performance/vampirtrace/vampirtrace-5.14.3-gpu1 b/packages/performance/vampirtrace/vampirtrace-5.14.3-gpu1 index cbaa3ec..2b20d99 100755 --- a/packages/performance/vampirtrace/vampirtrace-5.14.3-gpu1 +++ b/packages/performance/vampirtrace/vampirtrace-5.14.3-gpu1 @@ -49,14 +49,19 @@ fi # use CUDA if [ $enable_cuda != 0 ] ; then - module load cuda + if [[ $PLATFORM == "hermit1" ]] ; then + module load craype-accel-nvidia35 + else + module load cuda + fi + #--with-cuda-inc-dir=$CUDA_DIR/include \ + #--with-cuda-lib-dir=$CUDA_DIR/lib \ + #--with-cudart-lib-dir=$CUDA_DIR/lib64 \ + #--with-cudart-dir=$CUDA_DIR \ CUDA_DIR=${CUDA_DIR:=$(dirname $(dirname $(which nvcc)))} CONFIGURE_OPTS="${CONFIGURE_OPTS} \ --with-cuda-dir=$CUDA_DIR \ - --with-cuda-inc-dir=$CUDA_DIR/include \ - --with-cuda-lib-dir=$CUDA_DIR/lib \ - --with-cudart-dir=$CUDA_DIR \ - --with-cudart-lib-dir=$CUDA_DIR/lib64 \ + -with-cupti-dir=$CUDA_DIR/extras/CUPTI " fi