forked from SiVeGCS/dask_template
changes regarding environment creation and deployment
This commit is contained in:
parent
79f28f8e02
commit
c2230177d2
3 changed files with 40 additions and 39 deletions
|
@ -41,8 +41,7 @@ cd deployment_scripts
|
||||||
3. Package the environment and transfer the archive to the target system:
|
3. Package the environment and transfer the archive to the target system:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
(my_env) $ conda deactivate
|
(base) $ conda pack -n <your-env> -o ray_env.tar.gz # conda-pack must be installed in the base environment
|
||||||
(base) $ conda pack -n my_env -o my_env.tar.gz # conda-pack must be installed in the base environment
|
|
||||||
```
|
```
|
||||||
|
|
||||||
A workspace is suitable to store the compressed Conda environment archive on Hawk. Proceed to the next step if you have already configured your workspace. Use the following command to create a workspace on the high-performance filesystem, which will expire in 10 days. For more information, such as how to enable reminder emails, refer to the [workspace mechanism](https://kb.hlrs.de/platforms/index.php/Workspace_mechanism) guide.
|
A workspace is suitable to store the compressed Conda environment archive on Hawk. Proceed to the next step if you have already configured your workspace. Use the following command to create a workspace on the high-performance filesystem, which will expire in 10 days. For more information, such as how to enable reminder emails, refer to the [workspace mechanism](https://kb.hlrs.de/platforms/index.php/Workspace_mechanism) guide.
|
||||||
|
@ -55,8 +54,8 @@ ws_find hpda_project # find the path to workspace, which is the destination dire
|
||||||
You can send your data to an existing workspace using:
|
You can send your data to an existing workspace using:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
scp my_env.tar.gz <username>@hawk.hww.hlrs.de:<workspace_directory>
|
scp ray_env.tar.gz <username>@hawk.hww.hlrs.de:<workspace_directory>
|
||||||
rm my_env.tar.gz # We don't need the archive locally anymore.
|
rm ray_env.tar.gz # We don't need the archive locally anymore.
|
||||||
```
|
```
|
||||||
|
|
||||||
4. Clone the repository on Hawk to use the deployment scripts and project structure:
|
4. Clone the repository on Hawk to use the deployment scripts and project structure:
|
||||||
|
@ -79,7 +78,7 @@ qsub -I -l select=1:node_type=rome -l walltime=01:00:00
|
||||||
2. Go into the directory with all code:
|
2. Go into the directory with all code:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd <source_directory>/deployment_scripts
|
cd <project_directory>/deployment_scripts
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Deploy the conda environment to the ram disk:
|
3. Deploy the conda environment to the ram disk:
|
||||||
|
|
|
@ -20,6 +20,4 @@ else
|
||||||
|
|
||||||
# Create Conda environment
|
# Create Conda environment
|
||||||
CONDA_SUBDIR=linux-64 conda env create --name $CONDA_ENV_NAME -f environment.yaml
|
CONDA_SUBDIR=linux-64 conda env create --name $CONDA_ENV_NAME -f environment.yaml
|
||||||
|
|
||||||
conda clean -y --all --force-pkgs-dirs
|
|
||||||
fi
|
fi
|
|
@ -1,41 +1,45 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# Check if a destination and environment name are provided
|
export WS_DIR=<workspace_dir>
|
||||||
if [ "$#" -ne 2 ]; then
|
|
||||||
echo "Usage: $0 <environment_name> <destination_directory>"
|
# Get the first character of the hostname
|
||||||
exit 1
|
first_char=$(hostname | cut -c1)
|
||||||
|
|
||||||
|
# Check if the first character is not "r"
|
||||||
|
if [[ $first_char != "r" ]]; then
|
||||||
|
# it's not a cpu node.
|
||||||
|
echo "Hostname does not start with 'r'."
|
||||||
|
# Get the first seven characters of the hostname
|
||||||
|
first_seven_chars=$(hostname | cut -c1,2,3,4,5,6,7)
|
||||||
|
# Check if it is an ai node
|
||||||
|
if [[ $first_seven_chars != "hawk-ai" ]]; then
|
||||||
|
echo "Hostname does not start with 'hawk-ai' too. Exiting."
|
||||||
|
return 1
|
||||||
|
else
|
||||||
|
echo "GPU node detected."
|
||||||
|
export OBJ_STR_MEMORY=350000000000
|
||||||
|
export TEMP_CHECKPOINT_DIR=/localscratch/$PBS_JOBID/model_checkpoints/
|
||||||
|
mkdir -p $TEMP_CHECKPOINT_DIR
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "CPU node detected."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Name of the Conda environment
|
module load bigdata/conda
|
||||||
CONDA_ENV_NAME="$1"
|
|
||||||
TAR_FILE="$CONDA_ENV_NAME.tar.gz"
|
|
||||||
|
|
||||||
# Check if the tar.gz file already exists
|
export RAY_DEDUP_LOGS=0
|
||||||
if [ -e "$TAR_FILE" ]; then
|
|
||||||
echo "Using existing $TAR_FILE"
|
|
||||||
else
|
|
||||||
# Pack the Conda environment if the file doesn't exist
|
|
||||||
conda pack -n "$CONDA_ENV_NAME" -o "$TAR_FILE"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Parse the destination host and directory
|
export ENV_ARCHIVE=ray_env.tar.gz
|
||||||
DESTINATION=$2
|
export CONDA_ENVS=/run/user/$PBS_JOBID/envs
|
||||||
IFS=':' read -ra DEST <<< "$DESTINATION"
|
export ENV_NAME=ray_env
|
||||||
DEST_HOST="${DEST[0]}"
|
export ENV_PATH=$CONDA_ENVS/$ENV_NAME
|
||||||
DEST_DIR="${DEST[1]}"
|
|
||||||
|
|
||||||
# Copy the environment tarball to the remote server
|
mkdir -p $ENV_PATH
|
||||||
scp "$TAR_FILE" "$DEST_HOST":"$DEST_DIR"
|
|
||||||
scp deploy-dask.sh "$DEST_HOST":"$DEST_DIR"
|
|
||||||
scp dask-worker.sh "$DEST_HOST":"$DEST_DIR"
|
|
||||||
|
|
||||||
echo "Conda environment '$CONDA_ENV_NAME' packed and deployed to '$DEST_HOST:$DEST_DIR' as '$TAR_FILE'."
|
tar -xzf $WS_DIR/$ENV_ARCHIVE -C $ENV_PATH
|
||||||
|
|
||||||
# Ask the user if they want to delete the tar.gz file
|
source $ENV_PATH/bin/activate
|
||||||
read -p "Do you want to delete the local tar.gz file? (y/n): " answer
|
|
||||||
if [ "$answer" == "y" ]; then
|
export CONDA_ENVS_PATH=CONDA_ENVS
|
||||||
rm "$TAR_FILE"
|
|
||||||
echo "Local tar.gz file deleted."
|
conda_unpack
|
||||||
else
|
|
||||||
echo "Local tar.gz file not deleted."
|
|
||||||
fi
|
|
Loading…
Reference in a new issue