From 966f73a51ee524d7323c16916d5add2d589c41d1 Mon Sep 17 00:00:00 2001 From: Kerem Kayabay Date: Wed, 7 Feb 2024 17:15:18 +0100 Subject: [PATCH] steps to reproduce the container bug --- reproduce_container_bug.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 reproduce_container_bug.md diff --git a/reproduce_container_bug.md b/reproduce_container_bug.md new file mode 100644 index 0000000..2707119 --- /dev/null +++ b/reproduce_container_bug.md @@ -0,0 +1,30 @@ +Create the container on the login node: + +```bash +export WS_DIR=$(ws_find workspace_dir) # adjust this +cd $WS_DIR +wget https://fex.hlrs.de/fop/FYaJqyzw/ray.tar # download the container archive +export CONTAINER_NAME=ray +export CONTAINER_TAG=latest +export UDOCKER_DIR="$WS_DIR/.udocker/" # to store the image layers +udocker images -l # this will create a repo the first time you use it +udocker rmi $CONTAINER_NAME:$CONTAINER_TAG # results in error since the image does not exist +udocker load -i $WS_DIR/$CONTAINER_NAME.tar $CONTAINER_NAME +rm /$WS_DIR/$CONTAINER_NAME.tar # you no longer need the tar archive +``` + +Allocate a CPU node: + +```bash +module load bigdata/udocker/1.3.4 +export WS_DIR=$(ws_find benchmarks) +udocker run --volume $WS_DIR:/workspace --volume /run/user/$PBS_JOBID/tmp:/tmp $CONTAINER_NAME +``` + +You should see a Python shell. + +```python +import ray +# ray.init(num_cpus=4) # Works with a small number of CPUs +ray.init() # But, it can't use all the available CPUs +```