366 lines
10 KiB
Text
366 lines
10 KiB
Text
|
#!/bin/sh
|
||
|
#------------------------------------------------------------------------------
|
||
|
# ========= |
|
||
|
# \\ / F ield | OpenFOAM: The Open Source CFD Toolbox
|
||
|
# \\ / O peration |
|
||
|
# \\ / A nd | Copyright held by original author
|
||
|
# \\/ M anipulation |
|
||
|
#-------------------------------------------------------------------------------
|
||
|
# License
|
||
|
# This file is part of OpenFOAM.
|
||
|
#
|
||
|
# OpenFOAM is free software; you can redistribute it and/or modify it
|
||
|
# under the terms of the GNU General Public License as published by the
|
||
|
# Free Software Foundation; either version 2 of the License, or (at your
|
||
|
# option) any later version.
|
||
|
#
|
||
|
# OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
|
||
|
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||
|
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||
|
# for more details.
|
||
|
#
|
||
|
# You should have received a copy of the GNU General Public License
|
||
|
# along with OpenFOAM; if not, write to the Free Software Foundation,
|
||
|
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||
|
#
|
||
|
# Script
|
||
|
# foamCheckJobs
|
||
|
#
|
||
|
# Description
|
||
|
# Uses runningJobs/, finishedJobs/ and foamProcessInfo to create stateFile.
|
||
|
# stateFile contains per pid information on state of process. Format:
|
||
|
# pid state command
|
||
|
#
|
||
|
# where state is one of 'RUNN', 'SUSP', 'OTHR', 'FINI', 'ABRT' ('PEND')
|
||
|
# (first three are from foamProcessInfo, others from jobInfo files)
|
||
|
# (PEND is special state from when user has submitted but no jobInfo
|
||
|
# file yet. Not supported by this script yet)
|
||
|
#
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
PROGNAME=${0##*/}
|
||
|
|
||
|
#-------------------------------------------------------------------------------
|
||
|
#- User settings
|
||
|
|
||
|
#- Number of days for files to be considered old
|
||
|
NDAYSLIMIT=7
|
||
|
#-------------------------------------------------------------------------------
|
||
|
|
||
|
#- work file
|
||
|
TMPFILE=/tmp/${PROGNAME}$$.tmp
|
||
|
#- work dir. Needs to be accessible for all machines
|
||
|
MACHDIR=$HOME/.OpenFOAM/${PROGNAME}
|
||
|
DEFSTATEFILE=$HOME/.OpenFOAM/foamCheckJobs.out
|
||
|
|
||
|
|
||
|
if [ `uname -s` = 'Linux' ]; then
|
||
|
ECHO='echo -e'
|
||
|
else
|
||
|
ECHO='echo'
|
||
|
fi
|
||
|
|
||
|
|
||
|
#-------------------------------------------------------------------------------
|
||
|
#
|
||
|
# Functions
|
||
|
#
|
||
|
#-------------------------------------------------------------------------------
|
||
|
|
||
|
# getRawEntry dictionary entry
|
||
|
# Prints value of dictionary entry
|
||
|
getRawEntry() {
|
||
|
grep -v '^//' $1 | grep "^[ \t]*$2 " | sed -e "s/^[ \t]*$2 [ ]*//"
|
||
|
}
|
||
|
|
||
|
# getEntry dictionary entry
|
||
|
# Like getRawEntry but strips " and ending ';'
|
||
|
getEntry() {
|
||
|
getRawEntry $1 $2 | sed -e 's/^"//' -e 's/;$//' -e 's/"$//'
|
||
|
}
|
||
|
|
||
|
# notEmpty directory
|
||
|
# Returns 0 if directory contains files/directories
|
||
|
notEmpty() {
|
||
|
if [ "`ls $1`" ]; then
|
||
|
return 0
|
||
|
else
|
||
|
return 1
|
||
|
fi
|
||
|
}
|
||
|
|
||
|
# dayDiff <date string 1> <date string 2>
|
||
|
# Prints number of days between the two
|
||
|
# Eg. dayDiff "Jan 10 2002" "Dec 28 1999"
|
||
|
# ==> 13
|
||
|
dayDiff() {
|
||
|
date -d "$1" > /dev/null 2>&1
|
||
|
if [ $? -ne 0 ]; then
|
||
|
#- option '-d' on date not supported. Give up.
|
||
|
echo "0"
|
||
|
else
|
||
|
year1=`echo "$1" | awk '{print $3}'`
|
||
|
year2=`echo "$2" | awk '{print $3}'`
|
||
|
day1=`date -d "$1" "+%j"`
|
||
|
day2=`date -d "$2" "+%j"`
|
||
|
|
||
|
nYears=`expr $year1 - $year2`
|
||
|
tmp1=`expr $nYears \* 365`
|
||
|
tmp2=`expr $day1 - $day2`
|
||
|
expr $tmp1 + $tmp2
|
||
|
fi
|
||
|
}
|
||
|
#dayDiff "`date '+%b %d %Y'`" "Dec 28 2001"
|
||
|
|
||
|
|
||
|
# getAllJobs jobInfoDirectory
|
||
|
# Prints list of all jobs in directory (e.g. runningJobs/)
|
||
|
# Also handles 'slaves' entries in jobInfo:
|
||
|
# slaves 1 ( penfold.23766 );
|
||
|
getAllJobs() {
|
||
|
if notEmpty $1; then
|
||
|
jobs=$1/*
|
||
|
for f in $jobs
|
||
|
do
|
||
|
line=`grep '^[ ]*slaves' $f 2>/dev/null`
|
||
|
if [ $? -eq 0 ]; then
|
||
|
slaveJobs=`echo "$line" | sed -e 's/.*(\(.*\)).*/\1/'`
|
||
|
jobs="$jobs $slaveJobs"
|
||
|
fi
|
||
|
done
|
||
|
else
|
||
|
jobs=''
|
||
|
fi
|
||
|
echo "$jobs"
|
||
|
}
|
||
|
|
||
|
# releaseLock jobId lockFile
|
||
|
# Releases lock on jobId
|
||
|
releaseLock () {
|
||
|
if [ -f $2 ]; then
|
||
|
#- move lock to finishedJobs
|
||
|
mv $2 $FOAM_JOB_DIR/finishedJobs/
|
||
|
fi
|
||
|
$ECHO "Lock on job $1 released."
|
||
|
}
|
||
|
|
||
|
|
||
|
printUsage() {
|
||
|
cat << LABEL
|
||
|
Usage: $PROGNAME [stateFile]
|
||
|
|
||
|
This program checks all the locks in the license directory to see if
|
||
|
their processes are still running. Processes will not release their
|
||
|
lock if they exit abnormally. This program will try to obtain process
|
||
|
information on the machine the process ran on and release the lock
|
||
|
if the program is no longer running.
|
||
|
|
||
|
Requirements: the environment variable FOAM_JOB_DIR needs to point to the
|
||
|
license directory and all machines have to be reachable using ssh.
|
||
|
|
||
|
The output from checking all running jobs is collected in an optional
|
||
|
file.
|
||
|
|
||
|
FILES:
|
||
|
\$FOAM_JOB_DIR/runningJobs locks for running processes
|
||
|
/finishedJobs ,, finished processes
|
||
|
LABEL
|
||
|
}
|
||
|
|
||
|
|
||
|
#-------------------------------------------------------------------------------
|
||
|
#
|
||
|
# Main
|
||
|
#
|
||
|
#-------------------------------------------------------------------------------
|
||
|
|
||
|
#- Check a few things
|
||
|
|
||
|
if [ ! "$FOAM_JOB_DIR" ]; then
|
||
|
$ECHO "$PROGNAME : FOAM_JOB_DIR environment variable not set."
|
||
|
$ECHO "This should point to your central license directory."
|
||
|
exit 1
|
||
|
fi
|
||
|
|
||
|
if [ ! -d "$FOAM_JOB_DIR" ]; then
|
||
|
$ECHO "$PROGNAME : The license directory accoring to FOAM_JOB_DIR is not valid."
|
||
|
$ECHO "FOAM_JOB_DIR: $FOAM_JOB_DIR"
|
||
|
exit 1
|
||
|
fi
|
||
|
if [ ! -d "$FOAM_JOB_DIR/runningJobs" -o ! -d "$FOAM_JOB_DIR/finishedJobs" ]; then
|
||
|
$ECHO "$PROGNAME : The license directory according to FOAM_JOB_DIR is not valid."
|
||
|
$ECHO "FOAM_JOB_DIR: $FOAM_JOB_DIR"
|
||
|
exit 1
|
||
|
fi
|
||
|
|
||
|
|
||
|
if [ $# -eq 1 ]; then
|
||
|
STATEFILE=$1
|
||
|
elif [ $# -eq 0 ]; then
|
||
|
STATEFILE=${STATEFILE:-$DEFSTATEFILE}
|
||
|
else
|
||
|
printUsage
|
||
|
exit 1
|
||
|
fi
|
||
|
|
||
|
#- obtain rsh method
|
||
|
RSH='ssh'
|
||
|
echo "Using remote shell type : $RSH"
|
||
|
|
||
|
echo ""
|
||
|
echo "Collecting information on jobs in"
|
||
|
echo " $FOAM_JOB_DIR"
|
||
|
echo ""
|
||
|
|
||
|
|
||
|
#- Collect machine names into $TMPFILE
|
||
|
# Also handles 'slaves' entry in jobInfo:
|
||
|
|
||
|
rm -f $TMPFILE; touch $TMPFILE
|
||
|
RUNJOBS=`getAllJobs $FOAM_JOB_DIR/runningJobs`
|
||
|
for f in $RUNJOBS
|
||
|
do
|
||
|
machinePid=`basename $f`
|
||
|
machine=`echo $machinePid | sed -e 's/\.[0-9][0-9]*$//'`
|
||
|
pid=`echo $machinePid | sed -e 's/.*\.\([0-9][0-9]*\)$/\1/'`
|
||
|
|
||
|
fgrep "$machine" $TMPFILE >/dev/null 2>&1
|
||
|
if [ $? -ne 0 ]; then
|
||
|
$ECHO "$machine" >> $TMPFILE
|
||
|
fi
|
||
|
done
|
||
|
$ECHO "Found machines:"
|
||
|
cat $TMPFILE
|
||
|
$ECHO ""
|
||
|
|
||
|
|
||
|
|
||
|
#- Collect process info on all machines, one file per machine
|
||
|
|
||
|
mkdir -p $MACHDIR
|
||
|
cnt=1
|
||
|
while true
|
||
|
do
|
||
|
machine=`sed -n -e "${cnt}p" $TMPFILE`
|
||
|
if [ ! "$machine" ]; then
|
||
|
break
|
||
|
fi
|
||
|
|
||
|
machFile=$MACHDIR/$machine
|
||
|
rm -f $machFile
|
||
|
$ECHO "Contacting $machine to collect process information:"
|
||
|
if [ $machine = `hostname` ]; then
|
||
|
$ECHO " foamProcessInfo $machFile"
|
||
|
foamProcessInfo $machFile >/dev/null 2>&1
|
||
|
else
|
||
|
$ECHO " $RSH $machine foamProcessInfo $machFile"
|
||
|
$RSH $machine foamProcessInfo $machFile >/dev/null 2>&1
|
||
|
fi
|
||
|
if [ $? -ne 0 -o ! -s $machFile ]; then
|
||
|
$ECHO "** Failed collecting process information on $machine."
|
||
|
$ECHO "Check $machFile and run foamProcessInfo by hand"
|
||
|
rm -f $machFile
|
||
|
else
|
||
|
$ECHO "Succesfully collected information in $machFile ..."
|
||
|
fi
|
||
|
|
||
|
cnt=`expr $cnt + 1`
|
||
|
done
|
||
|
$ECHO ""
|
||
|
|
||
|
|
||
|
#- Construct state for runningJobs; move non runnning jobs to finishedJobs
|
||
|
|
||
|
releaseAll=''
|
||
|
rm -f $STATEFILE
|
||
|
for f in $RUNJOBS
|
||
|
do
|
||
|
machinePid=`basename $f`
|
||
|
machine=`echo $machinePid | sed -e 's/\.[0-9][0-9]*$//'`
|
||
|
pid=`echo $machinePid | sed -e 's/.*\.\([0-9][0-9]*\)$/\1/'`
|
||
|
|
||
|
machFile=$MACHDIR/$machine
|
||
|
if [ -r $machFile ]; then
|
||
|
entry=`grep "^$pid " $machFile 2>/dev/null`
|
||
|
if [ $? -ne 0 -o ! "$entry" ]; then
|
||
|
if [ "$releaseAll" ]; then
|
||
|
releaseLock $machinePid $f
|
||
|
else
|
||
|
$ECHO "Job $machinePid seems to be no longer running. Release lock? (y/a)\c"
|
||
|
read answ
|
||
|
if [ "${answ:-y}" = 'y' ]; then
|
||
|
releaseLock $machinePid $f
|
||
|
elif [ "${answ:-y}" = 'a' ]; then
|
||
|
releaseAll='yes'
|
||
|
releaseLock $machinePid $f
|
||
|
else
|
||
|
state='OTHR'
|
||
|
$ECHO "$machinePid $state" >> $STATEFILE
|
||
|
fi
|
||
|
fi
|
||
|
else
|
||
|
state=`echo "$entry" | awk '{print $2}'`
|
||
|
$ECHO "$machinePid $state" >> $STATEFILE
|
||
|
fi
|
||
|
fi
|
||
|
done
|
||
|
|
||
|
|
||
|
|
||
|
#- Collect old jobs in finishedJobs
|
||
|
|
||
|
OLDFILES=`find $FOAM_JOB_DIR/finishedJobs -mtime +$NDAYSLIMIT -print`
|
||
|
|
||
|
#- Construct state for finishedJobs and check on date of files.
|
||
|
|
||
|
if notEmpty $FOAM_JOB_DIR/finishedJobs; then
|
||
|
dateNow=`date '+%b %d %Y'`
|
||
|
for f in $FOAM_JOB_DIR/finishedJobs/*
|
||
|
do
|
||
|
sz=`ls -s $f | awk '{print $1}'`
|
||
|
if [ "$sz" -gt 0 ]; then
|
||
|
machinePid=`basename $f`
|
||
|
machine=`echo $machinePid | sed -e 's/\.[0-9][0-9]*$//'`
|
||
|
pid=`echo $machinePid | sed -e 's/.*\.\([0-9][0-9]*\)$/\1/'`
|
||
|
|
||
|
end=`getEntry $f endDate`
|
||
|
if [ ! "$end" ]; then
|
||
|
state='ABRT'
|
||
|
else
|
||
|
nDaysOld=`dayDiff "$dateNow" "$end"`
|
||
|
if [ "$nDaysOld" -gt $NDAYSLIMIT ]; then
|
||
|
OLDFILES="$OLDFILES $f"
|
||
|
fi
|
||
|
|
||
|
state='FINI'
|
||
|
fi
|
||
|
|
||
|
$ECHO "$machinePid $state" >> $STATEFILE
|
||
|
fi
|
||
|
done
|
||
|
fi
|
||
|
|
||
|
|
||
|
#- Remove old locks
|
||
|
|
||
|
nOldFiles=`echo "$OLDFILES" | wc -w`
|
||
|
if [ "$nOldFiles" -gt 0 ]; then
|
||
|
$ECHO "You seem to have $nOldFiles locks older than $NDAYSLIMIT days in finishedJobs/"
|
||
|
$ECHO "Do you want to remove these? (y)\c"
|
||
|
read answ
|
||
|
if [ "${answ:-y}" = 'y' ]; then
|
||
|
rm -f $OLDFILES
|
||
|
fi
|
||
|
fi
|
||
|
|
||
|
|
||
|
rm -f $TMPFILE
|
||
|
rm -r $MACHDIR
|
||
|
|
||
|
$ECHO ""
|
||
|
$ECHO "Updated stateFile:"
|
||
|
$ECHO " $STATEFILE"
|
||
|
$ECHO ""
|
||
|
|
||
|
#------------------------------------------------------------------------------
|