Versions Compared


  • This line was added.
  • This line was removed.
  • Formatting was changed.



Code Block
titleExample 1 : One process with a single GPU using shared node access
#!/bin/bash --login

#SBATCH --account=project-gpu
#SBATCH --partition=gpu
#SBATCH --nodes=1              #1 nodes in this example
#SBATCH --gpus-per-node=1      #1 GPUs per node (1 "allocation packs" in total for the job)
#SBATCH --time=00:05:00

#Loading needed modules (adapt this for your own purposes):
module load PrgEnv-cray
module load rocm craype-accel-amd-gfx90a
module list

#MPI & OpenMP settings
export OMP_NUM_THREADS=1 #This controls the real number of threads per task

srun -N 1 -n 1 -c 8 --gpus-per-node=1 ./program

Code Block
titleExample 2 : Single CPU process that use the eight GPUs of the node
#!/bin/bash --login

#SBATCH --account=project-gpu
#SBATCH --partition=gpu
#SBATCH --nodes=1              #1 nodes in this example
#SBATCH --exclusive            #All resources of the node are exclusive to this job
#                              #8 GPUs per node (8 "allocation packs" in total for the job)
#SBATCH --time=00:05:00

#Loading needed modules (adapt this for your own purposes):
module load PrgEnv-cray
module load rocm craype-accel-amd-gfx90a
module list

#MPI & OpenMP settings
export OMP_NUM_THREADS=1           #This controls the real CPU-cores per task for the executable

srun -N 1 -n 1 -c 64 --gpus-per-node=8 --gpus-per-task=8 ./program

Code Block
titleExample 3 : Eight MPI processes each with a single GPU (use exclusive node access)
#!/bin/bash --login

#SBATCH --account=project-gpu
#SBATCH --partition=gpu
#SBATCH --nodes=1              #1 nodes in this example
#SBATCH --exclusive            #All resources of the node are exclusive to this job
#                              #8 GPUs per node (8 "allocation packs" in total for the job)
#SBATCH --time=00:05:00

#Loading needed modules (adapt this for your own purposes):
module load PrgEnv-cray
module load rocm craype-accel-amd-gfx90a
module list

#MPI & OpenMP settings
export MPICH_GPU_SUPPORT_ENABLED=1 #This allows for GPU-aware MPI communication among GPUs
export OMP_NUM_THREADS=1           #This controls the real number of threads per task

srun -N 1 -n 8 -c 8 --gpus-per-node=8 --gpus-per-task=1 --gpu-bind=closest ./program

titleMethod 1 may fail for some applications.

The use of --gpu-bind=closest may not work for all codes. For those codes, "manual" binding may be the only reliable method if they relying OpenMP or OpenACC pragma's for moving data from/to host to/from GPU and attempting to use GPU-to-GPU enabled MPI communication.


Full guides