From c2230177d28b11418da7128b1a94e0b12f408076 Mon Sep 17 00:00:00 2001 From: Kerem Kayabay Date: Fri, 5 Jan 2024 13:44:48 +0100 Subject: [PATCH] changes regarding environment creation and deployment --- README.md | 9 ++--- deployment_scripts/create-env.sh | 2 - deployment_scripts/deploy-env.sh | 68 +++++++++++++++++--------------- 3 files changed, 40 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 1c569af..6bb972a 100644 --- a/README.md +++ b/README.md @@ -41,8 +41,7 @@ cd deployment_scripts 3. Package the environment and transfer the archive to the target system: ```bash -(my_env) $ conda deactivate -(base) $ conda pack -n my_env -o my_env.tar.gz # conda-pack must be installed in the base environment +(base) $ conda pack -n -o ray_env.tar.gz # conda-pack must be installed in the base environment ``` A workspace is suitable to store the compressed Conda environment archive on Hawk. Proceed to the next step if you have already configured your workspace. Use the following command to create a workspace on the high-performance filesystem, which will expire in 10 days. For more information, such as how to enable reminder emails, refer to the [workspace mechanism](https://kb.hlrs.de/platforms/index.php/Workspace_mechanism) guide. @@ -55,8 +54,8 @@ ws_find hpda_project # find the path to workspace, which is the destination dire You can send your data to an existing workspace using: ```bash -scp my_env.tar.gz @hawk.hww.hlrs.de: -rm my_env.tar.gz # We don't need the archive locally anymore. +scp ray_env.tar.gz @hawk.hww.hlrs.de: +rm ray_env.tar.gz # We don't need the archive locally anymore. ``` 4. Clone the repository on Hawk to use the deployment scripts and project structure: @@ -79,7 +78,7 @@ qsub -I -l select=1:node_type=rome -l walltime=01:00:00 2. Go into the directory with all code: ```bash -cd /deployment_scripts +cd /deployment_scripts ``` 3. Deploy the conda environment to the ram disk: diff --git a/deployment_scripts/create-env.sh b/deployment_scripts/create-env.sh index 5b586a8..1d87727 100755 --- a/deployment_scripts/create-env.sh +++ b/deployment_scripts/create-env.sh @@ -20,6 +20,4 @@ else # Create Conda environment CONDA_SUBDIR=linux-64 conda env create --name $CONDA_ENV_NAME -f environment.yaml - - conda clean -y --all --force-pkgs-dirs fi \ No newline at end of file diff --git a/deployment_scripts/deploy-env.sh b/deployment_scripts/deploy-env.sh index 913269b..0fc23e3 100755 --- a/deployment_scripts/deploy-env.sh +++ b/deployment_scripts/deploy-env.sh @@ -1,41 +1,45 @@ #!/bin/bash -# Check if a destination and environment name are provided -if [ "$#" -ne 2 ]; then - echo "Usage: $0 " - exit 1 +export WS_DIR= + +# Get the first character of the hostname +first_char=$(hostname | cut -c1) + +# Check if the first character is not "r" +if [[ $first_char != "r" ]]; then + # it's not a cpu node. + echo "Hostname does not start with 'r'." + # Get the first seven characters of the hostname + first_seven_chars=$(hostname | cut -c1,2,3,4,5,6,7) + # Check if it is an ai node + if [[ $first_seven_chars != "hawk-ai" ]]; then + echo "Hostname does not start with 'hawk-ai' too. Exiting." + return 1 + else + echo "GPU node detected." + export OBJ_STR_MEMORY=350000000000 + export TEMP_CHECKPOINT_DIR=/localscratch/$PBS_JOBID/model_checkpoints/ + mkdir -p $TEMP_CHECKPOINT_DIR + fi +else + echo "CPU node detected." fi -# Name of the Conda environment -CONDA_ENV_NAME="$1" -TAR_FILE="$CONDA_ENV_NAME.tar.gz" +module load bigdata/conda -# Check if the tar.gz file already exists -if [ -e "$TAR_FILE" ]; then - echo "Using existing $TAR_FILE" -else - # Pack the Conda environment if the file doesn't exist - conda pack -n "$CONDA_ENV_NAME" -o "$TAR_FILE" -fi +export RAY_DEDUP_LOGS=0 -# Parse the destination host and directory -DESTINATION=$2 -IFS=':' read -ra DEST <<< "$DESTINATION" -DEST_HOST="${DEST[0]}" -DEST_DIR="${DEST[1]}" +export ENV_ARCHIVE=ray_env.tar.gz +export CONDA_ENVS=/run/user/$PBS_JOBID/envs +export ENV_NAME=ray_env +export ENV_PATH=$CONDA_ENVS/$ENV_NAME -# Copy the environment tarball to the remote server -scp "$TAR_FILE" "$DEST_HOST":"$DEST_DIR" -scp deploy-dask.sh "$DEST_HOST":"$DEST_DIR" -scp dask-worker.sh "$DEST_HOST":"$DEST_DIR" +mkdir -p $ENV_PATH -echo "Conda environment '$CONDA_ENV_NAME' packed and deployed to '$DEST_HOST:$DEST_DIR' as '$TAR_FILE'." +tar -xzf $WS_DIR/$ENV_ARCHIVE -C $ENV_PATH -# Ask the user if they want to delete the tar.gz file -read -p "Do you want to delete the local tar.gz file? (y/n): " answer -if [ "$answer" == "y" ]; then - rm "$TAR_FILE" - echo "Local tar.gz file deleted." -else - echo "Local tar.gz file not deleted." -fi \ No newline at end of file +source $ENV_PATH/bin/activate + +export CONDA_ENVS_PATH=CONDA_ENVS + +conda_unpack \ No newline at end of file