preparing the environment for linux-64 platform
This commit is contained in:
parent
bd2a9e7f33
commit
8474b4328d
3 changed files with 2 additions and 87 deletions
|
@ -19,7 +19,7 @@ else
|
|||
echo "Environment '$CONDA_ENV_NAME' does not exist, creating it."
|
||||
|
||||
# Create Conda environment
|
||||
conda env create --name $CONDA_ENV_NAME -f environment.yaml
|
||||
CONDA_SUBDIR=linux-64 conda env create --name $CONDA_ENV_NAME -f environment.yaml
|
||||
|
||||
conda clean --all --force-pkgs-dirs
|
||||
conda clean -y --all --force-pkgs-dirs
|
||||
fi
|
|
@ -1,31 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
#Get the current workspace directory and the master node
|
||||
export CURRENT_WORKSPACE=$1
|
||||
export DASK_SCHEDULER_HOST=$2
|
||||
|
||||
# Path to localscratch
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S') - Worker $HOSTNAME] INFO: Setting up Dask environment"
|
||||
export DASK_ENV="/localscratch/${PBS_JOBID}/dask"
|
||||
mkdir -p $DASK_ENV
|
||||
|
||||
# Extract Dask environment in localscratch
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S') - Worker $HOSTNAME] INFO: Extracting Dask environment to $DASK_ENV"
|
||||
tar -xzf $CURRENT_WORKSPACE/dask-env.tar.gz -C $DASK_ENV
|
||||
chmod -R 700 $DASK_ENV
|
||||
|
||||
# Start the dask environment
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S') - Worker $HOSTNAME] INFO: Setting up Dask environment"
|
||||
source $DASK_ENV/bin/activate
|
||||
conda-unpack
|
||||
|
||||
# Start Dask worker
|
||||
export DASK_SCHEDULER_PORT="8786" # Replace with the port on which the Dask scheduler is running
|
||||
|
||||
# Additional Dask worker options can be added here if needed
|
||||
# Change local directory if memory is an issue
|
||||
|
||||
# Change directory to localscratch and start Dask worker
|
||||
cd $DASK_ENV
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S') - Worker $HOSTNAME] INFO: Starting Dask worker at $DASK_SCHEDULER_HOST on port $DASK_SCHEDULER_PORT"
|
||||
dask worker $DASK_SCHEDULER_HOST:$DASK_SCHEDULER_PORT
|
|
@ -1,54 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
export CURRENT_WORKSPACE=$1
|
||||
|
||||
# Check if running in a PBS Job environment
|
||||
if [ -z ${PBS_NODEFILE+x} ]; then
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S') - Master] ERROR: This script is meant to run as a part of PBS Job. Don't start it at login nodes."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
export NUM_NODES=$(wc -l < $PBS_NODEFILE)
|
||||
|
||||
if [ $NUM_NODES -lt 2 ]; then
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S') - Master] WARNING: You have a single node job running. Dask cluster requires at least 2 nodes."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
export ALL_NODES=$(cat $PBS_NODEFILE)
|
||||
export SCHEDULER_NODE="$(head -n1 $PBS_NODEFILE)-ib"
|
||||
export WORKER_NODES=$(tail -n+2 $PBS_NODEFILE)
|
||||
|
||||
export DASK_SCHEDULER_PORT=8786
|
||||
export DASK_UI_PORT=8787
|
||||
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S') - Master] INFO: Starting Dask cluster with $NUM_NODES nodes."
|
||||
# Path to localscratch
|
||||
export DASK_ENV="/localscratch/${PBS_JOBID}/dask"
|
||||
mkdir -p $DASK_ENV
|
||||
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S') - Master] INFO: Extracting Dask environment to $DASK_ENV"
|
||||
# Extract Dask environment in localscratch
|
||||
tar -xzf $CURRENT_WORKSPACE/dask-env.tar.gz -C $DASK_ENV
|
||||
chmod -R 700 $DASK_ENV
|
||||
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S') - Master] INFO: Setting up Dask environment"
|
||||
# Start the dask environment
|
||||
source $DASK_ENV/bin/activate
|
||||
conda-unpack
|
||||
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S') - Master] INFO: Starting Dask Scheduler at $SCHEDULER_NODE on port $DASK_SCHEDULER_PORT"
|
||||
dask scheduler --host $SCHEDULER_NODE --port $DASK_SCHEDULER_PORT &
|
||||
|
||||
export NUM_NODES=$(sort $PBS_NODEFILE |uniq | wc -l)
|
||||
|
||||
# Assuming you have a Dask worker script named 'dask-worker-script.py', modify this accordingly
|
||||
for ((i=1;i<$NUM_NODES;i++)); do
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S') - Master] INFO: Starting Dask Worker at $i"
|
||||
pbsdsh -n $i -o -- bash -l -c "source $CURRENT_WORKSPACE/dask-worker.sh $CURRENT_WORKSPACE $SCHEDULER_NODE"
|
||||
done
|
||||
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S') - Master] INFO: Dask cluster ready, wait for workers to connect to the scheduler."
|
||||
|
||||
# Optionally, you can provide a script for the workers to execute using ssh, similar to Spark.
|
||||
# Example: ssh $node "source activate your_conda_env && python your_dask_worker_script.py" &
|
Loading…
Reference in a new issue