changes regarding environment creation and deployment
This commit is contained in:
parent
79f28f8e02
commit
c2230177d2
3 changed files with 40 additions and 39 deletions
|
@ -41,8 +41,7 @@ cd deployment_scripts
|
|||
3. Package the environment and transfer the archive to the target system:
|
||||
|
||||
```bash
|
||||
(my_env) $ conda deactivate
|
||||
(base) $ conda pack -n my_env -o my_env.tar.gz # conda-pack must be installed in the base environment
|
||||
(base) $ conda pack -n <your-env> -o ray_env.tar.gz # conda-pack must be installed in the base environment
|
||||
```
|
||||
|
||||
A workspace is suitable to store the compressed Conda environment archive on Hawk. Proceed to the next step if you have already configured your workspace. Use the following command to create a workspace on the high-performance filesystem, which will expire in 10 days. For more information, such as how to enable reminder emails, refer to the [workspace mechanism](https://kb.hlrs.de/platforms/index.php/Workspace_mechanism) guide.
|
||||
|
@ -55,8 +54,8 @@ ws_find hpda_project # find the path to workspace, which is the destination dire
|
|||
You can send your data to an existing workspace using:
|
||||
|
||||
```bash
|
||||
scp my_env.tar.gz <username>@hawk.hww.hlrs.de:<workspace_directory>
|
||||
rm my_env.tar.gz # We don't need the archive locally anymore.
|
||||
scp ray_env.tar.gz <username>@hawk.hww.hlrs.de:<workspace_directory>
|
||||
rm ray_env.tar.gz # We don't need the archive locally anymore.
|
||||
```
|
||||
|
||||
4. Clone the repository on Hawk to use the deployment scripts and project structure:
|
||||
|
@ -79,7 +78,7 @@ qsub -I -l select=1:node_type=rome -l walltime=01:00:00
|
|||
2. Go into the directory with all code:
|
||||
|
||||
```bash
|
||||
cd <source_directory>/deployment_scripts
|
||||
cd <project_directory>/deployment_scripts
|
||||
```
|
||||
|
||||
3. Deploy the conda environment to the ram disk:
|
||||
|
|
|
@ -20,6 +20,4 @@ else
|
|||
|
||||
# Create Conda environment
|
||||
CONDA_SUBDIR=linux-64 conda env create --name $CONDA_ENV_NAME -f environment.yaml
|
||||
|
||||
conda clean -y --all --force-pkgs-dirs
|
||||
fi
|
|
@ -1,41 +1,45 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Check if a destination and environment name are provided
|
||||
if [ "$#" -ne 2 ]; then
|
||||
echo "Usage: $0 <environment_name> <destination_directory>"
|
||||
exit 1
|
||||
fi
|
||||
export WS_DIR=<workspace_dir>
|
||||
|
||||
# Name of the Conda environment
|
||||
CONDA_ENV_NAME="$1"
|
||||
TAR_FILE="$CONDA_ENV_NAME.tar.gz"
|
||||
# Get the first character of the hostname
|
||||
first_char=$(hostname | cut -c1)
|
||||
|
||||
# Check if the tar.gz file already exists
|
||||
if [ -e "$TAR_FILE" ]; then
|
||||
echo "Using existing $TAR_FILE"
|
||||
# Check if the first character is not "r"
|
||||
if [[ $first_char != "r" ]]; then
|
||||
# it's not a cpu node.
|
||||
echo "Hostname does not start with 'r'."
|
||||
# Get the first seven characters of the hostname
|
||||
first_seven_chars=$(hostname | cut -c1,2,3,4,5,6,7)
|
||||
# Check if it is an ai node
|
||||
if [[ $first_seven_chars != "hawk-ai" ]]; then
|
||||
echo "Hostname does not start with 'hawk-ai' too. Exiting."
|
||||
return 1
|
||||
else
|
||||
# Pack the Conda environment if the file doesn't exist
|
||||
conda pack -n "$CONDA_ENV_NAME" -o "$TAR_FILE"
|
||||
echo "GPU node detected."
|
||||
export OBJ_STR_MEMORY=350000000000
|
||||
export TEMP_CHECKPOINT_DIR=/localscratch/$PBS_JOBID/model_checkpoints/
|
||||
mkdir -p $TEMP_CHECKPOINT_DIR
|
||||
fi
|
||||
|
||||
# Parse the destination host and directory
|
||||
DESTINATION=$2
|
||||
IFS=':' read -ra DEST <<< "$DESTINATION"
|
||||
DEST_HOST="${DEST[0]}"
|
||||
DEST_DIR="${DEST[1]}"
|
||||
|
||||
# Copy the environment tarball to the remote server
|
||||
scp "$TAR_FILE" "$DEST_HOST":"$DEST_DIR"
|
||||
scp deploy-dask.sh "$DEST_HOST":"$DEST_DIR"
|
||||
scp dask-worker.sh "$DEST_HOST":"$DEST_DIR"
|
||||
|
||||
echo "Conda environment '$CONDA_ENV_NAME' packed and deployed to '$DEST_HOST:$DEST_DIR' as '$TAR_FILE'."
|
||||
|
||||
# Ask the user if they want to delete the tar.gz file
|
||||
read -p "Do you want to delete the local tar.gz file? (y/n): " answer
|
||||
if [ "$answer" == "y" ]; then
|
||||
rm "$TAR_FILE"
|
||||
echo "Local tar.gz file deleted."
|
||||
else
|
||||
echo "Local tar.gz file not deleted."
|
||||
echo "CPU node detected."
|
||||
fi
|
||||
|
||||
module load bigdata/conda
|
||||
|
||||
export RAY_DEDUP_LOGS=0
|
||||
|
||||
export ENV_ARCHIVE=ray_env.tar.gz
|
||||
export CONDA_ENVS=/run/user/$PBS_JOBID/envs
|
||||
export ENV_NAME=ray_env
|
||||
export ENV_PATH=$CONDA_ENVS/$ENV_NAME
|
||||
|
||||
mkdir -p $ENV_PATH
|
||||
|
||||
tar -xzf $WS_DIR/$ENV_ARCHIVE -C $ENV_PATH
|
||||
|
||||
source $ENV_PATH/bin/activate
|
||||
|
||||
export CONDA_ENVS_PATH=CONDA_ENVS
|
||||
|
||||
conda_unpack
|
Loading…
Reference in a new issue