ray_template/deployment_scripts/deploy-env.sh

43 lines
1.1 KiB
Bash
Raw Permalink Normal View History

2023-12-07 09:26:25 +00:00
#!/bin/bash
export WS_DIR=<workspace_dir>
2023-12-07 09:26:25 +00:00
# Get the first character of the hostname
first_char=$(hostname | cut -c1)
2023-12-07 09:26:25 +00:00
# Check if the first character is not "r"
if [[ $first_char != "r" ]]; then
# it's not a cpu node.
echo "Hostname does not start with 'r'."
# Get the first seven characters of the hostname
first_seven_chars=$(hostname | cut -c1,2,3,4,5,6,7)
# Check if it is an ai node
if [[ $first_seven_chars != "hawk-ai" ]]; then
echo "Hostname does not start with 'hawk-ai' too. Exiting."
return 1
else
echo "GPU node detected."
export OBJ_STR_MEMORY=350000000000
export TEMP_CHECKPOINT_DIR=/localscratch/$PBS_JOBID/model_checkpoints/
mkdir -p $TEMP_CHECKPOINT_DIR
fi
2023-12-07 09:26:25 +00:00
else
echo "CPU node detected."
2023-12-07 09:26:25 +00:00
fi
module load bigdata/conda
2023-12-07 09:26:25 +00:00
export RAY_DEDUP_LOGS=0
2023-12-07 09:26:25 +00:00
export ENV_ARCHIVE=ray_env.tar.gz
export CONDA_ENVS=/run/user/$PBS_JOBID/envs
export ENV_NAME=ray_env
export ENV_PATH=$CONDA_ENVS/$ENV_NAME
2023-12-07 09:26:25 +00:00
mkdir -p $ENV_PATH
tar -xzf $WS_DIR/$ENV_ARCHIVE -C $ENV_PATH
source $ENV_PATH/bin/activate
2024-01-05 13:28:18 +00:00
export CONDA_ENVS_PATH=CONDA_ENVS