#!/bin/bash -l # # Copyright (c) 2022 Christoph Niethammer # # Script checking an lmod based module environment for problems during module loading/unloading # function print_usage() { echo "Script checking the module environment for problems during module loading/unloading" echo "usage: $0 [MODULE_PATTERN] [--exclude PATTERN]" } # Command line options: declare -a modulespecs # check only subset of modules declare -a exclude_pattern # exclude modules with given pattern, e.g. nightly builds while [[ $# -gt 0 ]] do key="$1" case $key in --exclude-pattern|--exclude) exclude_patterns+=($2) shift 2 ;; -h|--help) print_usage exit 0 ;; *) modulespecs+=($1) shift ;; esac done # definitions for color output if outputting to tty if [ -t 1 ] ; then Color_Off='\e[0m' # Text Reset IGreen='\e[0;92m' # Intense Green IRed='\e[0;91m' # Intense Red IMagenta='\e[0;95m' # magenta fi # intermediate files, logfiles LOGDIR=${LOGDIR:=$PWD} tmpdir=/tmp/check_modules-$USER mkdir -p $tmpdir logfile="$LOGDIR/check_modules.log" # logfile with detailed information module_load_logfile="$tmpdir/.module_load.log" # output of 'module load commands module_rm_logfile="$tmpdir/.module_rm.log" # output of 'module rm' commands module_clean_env_file="$tmpdir/.module_clean_env" # original environment # safe the original environment set > $module_clean_env_file module save check_modules # list of all failed modules failed_modules=() echo "--------------------" | tee $logfile echo "Module environment check" | tee -a $logfile echo "--------------------" | tee -a $logfile echo "Date: $(date)" | tee -a $logfile echo "Host: $(/bin/hostname)" | tee -a $logfile echo "USER: $USER" | tee -a $logfile echo "Logfile: $logfile" | tee -a $logfile echo "--------------------" | tee -a $logfile echo Modulespecs: ${modulespecs[@]} | tee -a $logfile echo Exclude: ${exclude_patterns[@]} | tee -a $logfile echo "--------------------" | tee -a $logfile echo "Environment:" >> $logfile cat $module_clean_env_file >> $logfile echo "--------------------" >> $logfile for modulespec in ${modulespecs[@]} do for m_original in $(module -t spider $modulespec 2>&1); do match=0 for exclude_pattern in ${exclude_patterns[@]} do if [[ "$m_original" =~ "$exclude_pattern" ]]; then echo -e "Skipping $m_original (matches $exclude_pattern) ... ${IMagenta} skipped${Color_Off}" match=1 continue fi done if [ $match == 1 ]; then continue fi if [[ $m_original =~ ^[A-Za-z] ]]; then # skip any non module line in output m=$(echo $m_original | sed -e 's/(.*)//') # Remove aliases e.g. (default) echo "Checking $m_original ... " echo "Checking $m_original ... " >> $logfile readarray deps < <( module spider $m_original |& grep '^ ') for dep in "${deps[@]}" ; do dep=${dep//[$'\t\r\n']} echo -n "... with deps $dep ..." echo "... with deps $dep ..." >> $logfile module load $dep >>$logfile 2>&1 cmd="module load $m" echo $cmd >> $logfile $cmd > $module_load_logfile 2>&1 cat $module_load_logfile >> $logfile module li >>$logfile 2>&1 # check if module was loaded and did not report errors during loading if module -t li 2>&1 | grep $m >/dev/null && ! grep ERROR $module_load_logfile >/dev/null ; then cmd="module rm $m" echo $cmd >> $logfile $cmd > $module_rm_logfile 2>&1 cat $module_rm_logfile >> $logfile module li >>$logfile 2>&1 # check if module was unloaded if module -t li 2>&1 | grep $m > /dev/null; then echo -e "${IRed}unloading failed${Color_Off}" echo "ERROR: unloading module '$m' failed" >> $logfile failed_modules=(${failed_modules[@]} "$m_original [$dep]") else echo -e "${IGreen}success${Color_Off}" echo "SUCCESS" >> $logfile fi else echo -e "${IRed}loading failed${Color_Off}" echo "ERROR: loading module '$m' failed" >> $logfile failed_modules=(${failed_modules[@]} "$m [$dep]") fi module unload $dep >>$logfile 2>&1 # clean up module environment cmd="module purge" echo $cmd >> $logfile $cmd >>$logfile 2>&1 module li >>$logfile 2>&1 echo "Resetting environment ..." >>$logfile # Reset the complete environment manually to overcome problems with # inconsistent internal caches of the module command after module purge. #source $module_clean_env_file 2>/dev/null module restore check_modules 2>/dev/null # clean up intermediate files rm -f $module_load_logfile rm -f $module_rm_logfile echo >>$logfile 2>&1 done fi done done # clean up file storing the initial environment rm -f $module_clean_env_file rm -rf $tmpdir echo "----------------------------------------" echo "Summary of failed modules (${#failed_modules[@]}):" echo "----------------------------------------" for m in ${failed_modules[@]}; do echo "$m" done echo "----------------------------------------"