#!/bin/sh #------------------------------------------------------------------------------ # ========= | # \\ / F ield | OpenFOAM: The Open Source CFD Toolbox # \\ / O peration | # \\ / A nd | Copyright held by original author # \\/ M anipulation | #------------------------------------------------------------------------------- # License # This file is part of OpenFOAM. # # OpenFOAM is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. # # OpenFOAM is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License # for more details. # # You should have received a copy of the GNU General Public License # along with OpenFOAM; if not, write to the Free Software Foundation, # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # # Script # foamCheckJobs # # Description # Uses runningJobs/, finishedJobs/ and foamProcessInfo to create stateFile. # stateFile contains per pid information on state of process. Format: # pid state command # # where state is one of 'RUNN', 'SUSP', 'OTHR', 'FINI', 'ABRT' ('PEND') # (first three are from foamProcessInfo, others from jobInfo files) # (PEND is special state from when user has submitted but no jobInfo # file yet. Not supported by this script yet) # #------------------------------------------------------------------------------ PROGNAME=${0##*/} #------------------------------------------------------------------------------- #- User settings #- Number of days for files to be considered old NDAYSLIMIT=7 #------------------------------------------------------------------------------- #- work file TMPFILE=/tmp/${PROGNAME}$$.tmp #- work dir. Needs to be accessible for all machines MACHDIR=$HOME/.OpenFOAM/${PROGNAME} DEFSTATEFILE=$HOME/.OpenFOAM/foamCheckJobs.out if [ `uname -s` = Linux ] then ECHO='echo -e' else ECHO='echo' fi #------------------------------------------------------------------------------- # # Functions # #------------------------------------------------------------------------------- # getRawEntry dictionary entry # Prints value of dictionary entry getRawEntry() { grep -v '^//' $1 | grep "^[ \t]*$2 " | sed -e "s/^[ \t]*$2 [ ]*//" } # getEntry dictionary entry # Like getRawEntry but strips " and ending ';' getEntry() { getRawEntry $1 $2 | sed -e 's/^"//' -e 's/;$//' -e 's/"$//' } # notEmpty directory # Returns 0 if directory contains files/directories notEmpty() { if [ "`ls $1`" ]; then return 0 else return 1 fi } # dayDiff # Prints number of days between the two # Eg. dayDiff "Jan 10 2002" "Dec 28 1999" # ==> 13 dayDiff() { date -d "$1" > /dev/null 2>&1 if [ $? -ne 0 ]; then #- option '-d' on date not supported. Give up. echo "0" else year1=`echo "$1" | awk '{print $3}'` year2=`echo "$2" | awk '{print $3}'` day1=`date -d "$1" "+%j"` day2=`date -d "$2" "+%j"` nYears=`expr $year1 - $year2` tmp1=`expr $nYears \* 365` tmp2=`expr $day1 - $day2` expr $tmp1 + $tmp2 fi } #dayDiff "`date '+%b %d %Y'`" "Dec 28 2001" # getAllJobs jobInfoDirectory # Prints list of all jobs in directory (e.g. runningJobs/) # Also handles 'slaves' entries in jobInfo: # slaves 1 ( penfold.23766 ); getAllJobs() { if notEmpty $1; then jobs=$1/* for f in $jobs do line=`grep '^[ ]*slaves' $f 2>/dev/null` if [ $? -eq 0 ]; then slaveJobs=`echo "$line" | sed -e 's/.*(\(.*\)).*/\1/'` jobs="$jobs $slaveJobs" fi done else jobs='' fi echo "$jobs" } # releaseLock jobId lockFile # Releases lock on jobId releaseLock () { if [ -f $2 ]; then #- move lock to finishedJobs mv $2 $FOAM_JOB_DIR/finishedJobs/ fi $ECHO "Lock on job $1 released." } printUsage() { cat << LABEL Usage: $PROGNAME [stateFile] This program checks all the locks in the license directory to see if their processes are still running. Processes will not release their lock if they exit abnormally. This program will try to obtain process information on the machine the process ran on and release the lock if the program is no longer running. Requirements: the environment variable FOAM_JOB_DIR needs to point to the license directory and all machines have to be reachable using ssh. The output from checking all running jobs is collected in an optional file. FILES: \$FOAM_JOB_DIR/runningJobs locks for running processes /finishedJobs ,, finished processes LABEL } #------------------------------------------------------------------------------- # # Main # #------------------------------------------------------------------------------- #- Check a few things if [ ! "$FOAM_JOB_DIR" ]; then $ECHO "$PROGNAME : FOAM_JOB_DIR environment variable not set." $ECHO "This should point to your central license directory." exit 1 fi if [ ! -d "$FOAM_JOB_DIR" ]; then $ECHO "$PROGNAME : The license directory accoring to FOAM_JOB_DIR is not valid." $ECHO "FOAM_JOB_DIR: $FOAM_JOB_DIR" exit 1 fi if [ ! -d "$FOAM_JOB_DIR/runningJobs" -o ! -d "$FOAM_JOB_DIR/finishedJobs" ]; then $ECHO "$PROGNAME : The license directory according to FOAM_JOB_DIR is not valid." $ECHO "FOAM_JOB_DIR: $FOAM_JOB_DIR" exit 1 fi if [ $# -eq 1 ]; then STATEFILE=$1 elif [ $# -eq 0 ]; then STATEFILE=${STATEFILE:-$DEFSTATEFILE} else printUsage exit 1 fi #- obtain rsh method RSH='ssh' echo "Using remote shell type : $RSH" echo "" echo "Collecting information on jobs in" echo " $FOAM_JOB_DIR" echo "" #- Collect machine names into $TMPFILE # Also handles 'slaves' entry in jobInfo: rm -f $TMPFILE; touch $TMPFILE RUNJOBS=`getAllJobs $FOAM_JOB_DIR/runningJobs` for f in $RUNJOBS do machinePid=`basename $f` machine=`echo $machinePid | sed -e 's/\.[0-9][0-9]*$//'` pid=`echo $machinePid | sed -e 's/.*\.\([0-9][0-9]*\)$/\1/'` fgrep "$machine" $TMPFILE >/dev/null 2>&1 if [ $? -ne 0 ]; then $ECHO "$machine" >> $TMPFILE fi done $ECHO "Found machines:" cat $TMPFILE $ECHO "" #- Collect process info on all machines, one file per machine mkdir -p $MACHDIR cnt=1 while true do machine=`sed -n -e "${cnt}p" $TMPFILE` if [ ! "$machine" ]; then break fi machFile=$MACHDIR/$machine rm -f $machFile $ECHO "Contacting $machine to collect process information:" if [ $machine = `hostname` ]; then $ECHO " foamProcessInfo $machFile" foamProcessInfo $machFile >/dev/null 2>&1 else $ECHO " $RSH $machine foamProcessInfo $machFile" $RSH $machine foamProcessInfo $machFile >/dev/null 2>&1 fi if [ $? -ne 0 -o ! -s $machFile ]; then $ECHO "** Failed collecting process information on $machine." $ECHO "Check $machFile and run foamProcessInfo by hand" rm -f $machFile else $ECHO "Succesfully collected information in $machFile ..." fi cnt=`expr $cnt + 1` done $ECHO "" #- Construct state for runningJobs; move non runnning jobs to finishedJobs releaseAll='' rm -f $STATEFILE for f in $RUNJOBS do machinePid=`basename $f` machine=`echo $machinePid | sed -e 's/\.[0-9][0-9]*$//'` pid=`echo $machinePid | sed -e 's/.*\.\([0-9][0-9]*\)$/\1/'` machFile=$MACHDIR/$machine if [ -r $machFile ]; then entry=`grep "^$pid " $machFile 2>/dev/null` if [ $? -ne 0 -o ! "$entry" ]; then if [ "$releaseAll" ]; then releaseLock $machinePid $f else $ECHO "Job $machinePid seems to be no longer running. Release lock? (y/a)\c" read answ if [ "${answ:-y}" = 'y' ]; then releaseLock $machinePid $f elif [ "${answ:-y}" = 'a' ]; then releaseAll='yes' releaseLock $machinePid $f else state='OTHR' $ECHO "$machinePid $state" >> $STATEFILE fi fi else state=`echo "$entry" | awk '{print $2}'` $ECHO "$machinePid $state" >> $STATEFILE fi fi done #- Collect old jobs in finishedJobs OLDFILES=`find $FOAM_JOB_DIR/finishedJobs -mtime +$NDAYSLIMIT -print` #- Construct state for finishedJobs and check on date of files. if notEmpty $FOAM_JOB_DIR/finishedJobs; then dateNow=`date '+%b %d %Y'` for f in $FOAM_JOB_DIR/finishedJobs/* do sz=`ls -s $f | awk '{print $1}'` if [ "$sz" -gt 0 ]; then machinePid=`basename $f` machine=`echo $machinePid | sed -e 's/\.[0-9][0-9]*$//'` pid=`echo $machinePid | sed -e 's/.*\.\([0-9][0-9]*\)$/\1/'` end=`getEntry $f endDate` if [ ! "$end" ]; then state='ABRT' else nDaysOld=`dayDiff "$dateNow" "$end"` if [ "$nDaysOld" -gt $NDAYSLIMIT ]; then OLDFILES="$OLDFILES $f" fi state='FINI' fi $ECHO "$machinePid $state" >> $STATEFILE fi done fi #- Remove old locks nOldFiles=`echo "$OLDFILES" | wc -w` if [ "$nOldFiles" -gt 0 ]; then $ECHO "You seem to have $nOldFiles locks older than $NDAYSLIMIT days in finishedJobs/" $ECHO "Do you want to remove these? (y)\c" read answ if [ "${answ:-y}" = 'y' ]; then rm -f $OLDFILES fi fi rm -f $TMPFILE rm -r $MACHDIR $ECHO "" $ECHO "Updated stateFile:" $ECHO " $STATEFILE" $ECHO "" #------------------------------------------------------------------------------